From 1f208ea67821703fd4de056ea6f0baa81f4ad4a5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:44 +1000 Subject: perf report: Fix -z option Fix a copy and paste error, -z was setting the group option. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.714204656@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cf0d21f1ae1..5c2965562c5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -675,7 +675,7 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_STRING('s', "sym-filter", &sym_filter, "pattern", "only display symbols matchig this pattern"), - OPT_BOOLEAN('z', "zero", &group, + OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, "profile at this frequency"), -- cgit v1.2.3-70-g09d2 From 6717534ddc328ae5cdf89f1ef802db83fc451f19 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:45 +1000 Subject: perf_counter tools: Remove zlib dependency The zlib devel libraries may not be installed and since we aren't using zlib we may as well remove it. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.802078956@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 6 ------ 1 file changed, 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9c6d0ae3708..f572c90f610 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -381,12 +381,6 @@ ifndef CC_LD_DYNPATH endif endif -ifdef ZLIB_PATH - BASIC_CFLAGS += -I$(ZLIB_PATH)/include - EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) -endif -EXTLIBS += -lz - ifdef NEEDS_SOCKET EXTLIBS += -lsocket endif -- cgit v1.2.3-70-g09d2 From 2ab52083ffc057014e502cf3473adc41436922fa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:46 +1000 Subject: perf top: Move skip symbols to an array Move the list of symbols we skip into an array, making it easier to add new ones. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.904782938@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5c2965562c5..731ec6d79c1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -286,11 +286,22 @@ static void *display_thread(void *arg) return NULL; } +/* Tag samples to be skipped. */ +char *skip_symbols[] = { + "default_idle", + "cpu_idle", + "enter_idle", + "exit_idle", + "mwait_idle", + NULL +}; + static int symbol_filter(struct dso *self, struct symbol *sym) { static int filter_match; struct sym_entry *syme; const char *name = sym->name; + int i; if (!strcmp(name, "_text") || !strcmp(name, "_etext") || @@ -302,13 +313,12 @@ static int symbol_filter(struct dso *self, struct symbol *sym) return 1; syme = dso__sym_priv(self, sym); - /* Tag samples to be skipped. */ - if (!strcmp("default_idle", name) || - !strcmp("cpu_idle", name) || - !strcmp("enter_idle", name) || - !strcmp("exit_idle", name) || - !strcmp("mwait_idle", name)) - syme->skip = 1; + for (i = 0; skip_symbols[i]; i++) { + if (!strcmp(skip_symbols[i], name)) { + syme->skip = 1; + break; + } + } if (filter_match == 1) { filter_end = sym->start; -- cgit v1.2.3-70-g09d2 From 3a3393ef75a14ae259a82f3f38624efa17884168 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:47 +1000 Subject: perf top: Add ppc64 specific skip symbols and strip ppc64 . prefix Filter out some ppc64 specific idle loop functions and remove leading '.' on ppc64 text symbols. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.995643441@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 731ec6d79c1..0506cd6e04c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -293,6 +293,8 @@ char *skip_symbols[] = { "enter_idle", "exit_idle", "mwait_idle", + "ppc64_runlatch_off", + "pseries_dedicated_idle_sleep", NULL }; @@ -303,6 +305,13 @@ static int symbol_filter(struct dso *self, struct symbol *sym) const char *name = sym->name; int i; + /* + * ppc64 uses function descriptors and appends a '.' to the + * start of every instruction address. Remove it. + */ + if (name[0] == '.') + name++; + if (!strcmp(name, "_text") || !strcmp(name, "_etext") || !strcmp(name, "_sinittext") || -- cgit v1.2.3-70-g09d2 From d8db1b57d31a6b30ea2f0df318eab50fc92b38d6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:48 +1000 Subject: perf report: Fix reporting of hypervisor PERF_EVENT_MISC_* is not a bitmask, so we have to mask and compare. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230141.088394681@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 135b7837e6b..88e88c510ae 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1213,6 +1213,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; + int cpumode; if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; @@ -1256,7 +1257,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + + if (cpumode == PERF_EVENT_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1264,7 +1267,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dprintf(" ...... dso: %s\n", dso->name); - } else if (event->header.misc & PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_EVENT_MISC_USER) { show = SHOW_USER; level = '.'; -- cgit v1.2.3-70-g09d2 From fb9c818873a788c5c01c9868cc6050df96e2c7df Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:49 +1000 Subject: perf report: Add hypervisor dso Add a dso for hypervisor samples. We don't get any symbol information on the ppc64 hypervisor but this at least gives us a high level summary of the time spent in there. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230141.182536873@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 88e88c510ae..3f5d8ea05ff 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -121,6 +121,7 @@ typedef union event_union { static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; +static struct dso *hypervisor_dso; static void dsos__add(struct dso *dso) { @@ -202,6 +203,11 @@ static int load_kernel(void) dsos__add(vdso); + hypervisor_dso = dso__new("[hypervisor]", 0); + if (!hypervisor_dso) + return -1; + dsos__add(hypervisor_dso); + return err; } @@ -640,7 +646,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self) if (self->sym) { ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); + self->dso == kernel_dso ? 'k' : + self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); } @@ -963,6 +970,9 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, } switch (context) { + case PERF_CONTEXT_HV: + dso = hypervisor_dso; + break; case PERF_CONTEXT_KERNEL: dso = kernel_dso; break; @@ -1275,6 +1285,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } else { show = SHOW_HV; level = 'H'; + + dso = hypervisor_dso; + dprintf(" ...... dso: [hypervisor]\n"); } -- cgit v1.2.3-70-g09d2 From 9198aa77b69647d1d91207f8075763abe7dc0bf4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:13 +0200 Subject: perf_counter tools: Fix storage size allocation of callchain list Fix a confusion while giving the size of a callchain list during its allocation. We are using the wrong structure size. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index ad3c2857896..bbf7813fefe 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -74,7 +74,7 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) for (i = start; i < chain->nr; i++) { struct callchain_list *call; - call = malloc(sizeof(*chain)); + call = malloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); return; -- cgit v1.2.3-70-g09d2 From 4424961ad6621a02c6b4c9093e801002c1bb9f65 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:14 +0200 Subject: perf_counter tools: Resolve symbols in callchains This patch resolves the names, when possible, of each ip present in the callchains while using the -c option with perf report. Example: 5.40% [k] __d_lookup 5.37% perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi do_lookup __link_path_walk path_walk do_path_lookup user_path_at sys_faccessat sys_access system_call_fastpath 0x7fb609846f77 0.01% perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi do_lookup __link_path_walk path_walk do_path_lookup user_path_at sys_faccessat Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 102 +++++++++++++++++++++++++++++--------------- tools/perf/util/callchain.c | 33 ++++++++------ tools/perf/util/callchain.h | 5 ++- 3 files changed, 90 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3f5d8ea05ff..197793051fa 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -794,8 +794,15 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) ret += callchain__fprintf(fp, self->parent, total_samples); - list_for_each_entry(chain, &self->val, list) - ret += fprintf(fp, " %p\n", (void *)chain->ip); + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->sym) + ret += fprintf(fp, " %s\n", chain->sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)chain->ip); + } return ret; } @@ -930,6 +937,55 @@ static int call__match(struct symbol *sym) return 0; } +static struct symbol ** +resolve_callchain(struct thread *thread, struct map *map, + struct ip_callchain *chain, struct hist_entry *entry) +{ + int i; + struct symbol **syms; + u64 context = PERF_CONTEXT_MAX; + + if (callchain) { + syms = calloc(chain->nr, sizeof(*syms)); + if (!syms) { + fprintf(stderr, "Can't allocate memory for symbols\n"); + exit(-1); + } + } + + for (i = 0; i < chain->nr; i++) { + u64 ip = chain->ips[i]; + struct dso *dso = NULL; + struct symbol *sym; + + if (ip >= PERF_CONTEXT_MAX) { + context = ip; + continue; + } + + switch (context) { + case PERF_CONTEXT_KERNEL: + dso = kernel_dso; + break; + default: + break; + } + + sym = resolve_symbol(thread, NULL, &dso, &ip); + + if (sym) { + if (sort__has_parent && call__match(sym) && + !entry->parent) + entry->parent = sym; + if (!callchain) + break; + syms[i] = sym; + } + } + + return syms; +} + /* * collect histogram counts */ @@ -942,6 +998,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; struct hist_entry *he; + struct symbol **syms = NULL; struct hist_entry entry = { .thread = thread, .map = map, @@ -955,39 +1012,11 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - if (sort__has_parent && chain) { - u64 context = PERF_CONTEXT_MAX; - int i; - - for (i = 0; i < chain->nr; i++) { - u64 ip = chain->ips[i]; - struct dso *dso = NULL; - struct symbol *sym; - - if (ip >= PERF_CONTEXT_MAX) { - context = ip; - continue; - } - - switch (context) { case PERF_CONTEXT_HV: dso = hypervisor_dso; break; - case PERF_CONTEXT_KERNEL: - dso = kernel_dso; - break; - default: - break; - } - - sym = resolve_symbol(thread, NULL, &dso, &ip); - - if (sym && call__match(sym)) { - entry.parent = sym; - break; - } - } - } + if ((sort__has_parent || callchain) && chain) + syms = resolve_callchain(thread, map, chain, &entry); while (*p != NULL) { parent = *p; @@ -997,8 +1026,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!cmp) { he->count += count; - if (callchain) - append_chain(&he->callchain, chain); + if (callchain) { + append_chain(&he->callchain, chain, syms); + free(syms); + } return 0; } @@ -1014,7 +1045,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, *he = entry; if (callchain) { callchain_init(&he->callchain); - append_chain(&he->callchain, chain); + append_chain(&he->callchain, chain, syms); + free(syms); } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, &hist); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index bbf7813fefe..6568cb198ba 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -67,7 +67,8 @@ static struct callchain_node *create_child(struct callchain_node *parent) } static void -fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) +fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, + struct symbol **syms) { int i; @@ -80,24 +81,26 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) return; } call->ip = chain->ips[i]; + call->sym = syms[i]; list_add_tail(&call->list, &node->val); } node->val_nr = i - start; } -static void add_child(struct callchain_node *parent, struct ip_callchain *chain) +static void add_child(struct callchain_node *parent, struct ip_callchain *chain, + struct symbol **syms) { struct callchain_node *new; new = create_child(parent); - fill_node(new, chain, parent->val_nr); + fill_node(new, chain, parent->val_nr, syms); new->hit = 1; } static void split_add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct callchain_list *to_split, int idx) + struct callchain_list *to_split, int idx, struct symbol **syms) { struct callchain_node *new; @@ -109,21 +112,22 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, parent->val_nr = idx; /* create the new one */ - add_child(parent, chain); + add_child(parent, chain, syms); } static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start); + int start, struct symbol **syms); static int -__append_chain_children(struct callchain_node *root, struct ip_callchain *chain) +__append_chain_children(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, root->val_nr); + int ret = __append_chain(rnode, chain, root->val_nr, syms); if (!ret) return 0; } @@ -132,7 +136,7 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain) static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start) + int start, struct symbol **syms) { struct callchain_list *cnode; int i = start; @@ -154,7 +158,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match only a part of the node. Split it and add the new chain */ if (i < root->val_nr) { - split_add_child(root, chain, cnode, i); + split_add_child(root, chain, cnode, i, syms); return 0; } @@ -164,11 +168,12 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, return 0; } - return __append_chain_children(root, chain); + return __append_chain_children(root, chain, syms); } -void append_chain(struct callchain_node *root, struct ip_callchain *chain) +void append_chain(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms) { - if (__append_chain_children(root, chain) == -1) - add_child(root, chain); + if (__append_chain_children(root, chain, syms) == -1) + add_child(root, chain, syms); } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fa1cd2f71fd..c942daa712e 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -4,6 +4,7 @@ #include "../perf.h" #include "list.h" #include "rbtree.h" +#include "symbol.h" struct callchain_node { @@ -18,6 +19,7 @@ struct callchain_node { struct callchain_list { unsigned long ip; + struct symbol *sym; struct list_head list; }; @@ -28,6 +30,7 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } -void append_chain(struct callchain_node *root, struct ip_callchain *chain); +void append_chain(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms); void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); #endif -- cgit v1.2.3-70-g09d2 From deac911cbdcb124fa0cee47c588e0cb0400b23b7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:15 +0200 Subject: perf_counter tools: Various fixes for callchains The symbol resolving has of course revealed some bugs in the callchain tree handling. This patch fixes some of them, including: - inherit the children from the parents while splitting a node - fix list range moving - fix indexes setting in callchains - create a child on the current node if the path doesn't match in the existent children (was only done on the root) - compare using symbols when possible so that we can match a function using any ip inside by referring to its start address. The practical effects are: - remove double callchains - fix upside down or any random order of callchains - fix wrong paths - fix bad hits and percentage accounts Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 122 ++++++++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 6568cb198ba..440db12c635 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -4,6 +4,9 @@ * Handle the callchains from the stream in an ad-hoc radix tree and then * sort them in an rbtree. * + * Using a radix for code path provides a fast retrieval and factorizes + * memory use. Also that lets us use the paths in a hierarchical graph view. + * */ #include @@ -14,7 +17,8 @@ #include "callchain.h" -static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +static void +rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -49,7 +53,12 @@ void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) rb_insert_callchain(rb_root, node); } -static struct callchain_node *create_child(struct callchain_node *parent) +/* + * Create a child for a parent. If inherit_children, then the new child + * will become the new parent of it's parent children + */ +static struct callchain_node * +create_child(struct callchain_node *parent, bool inherit_children) { struct callchain_node *new; @@ -61,14 +70,27 @@ static struct callchain_node *create_child(struct callchain_node *parent) new->parent = parent; INIT_LIST_HEAD(&new->children); INIT_LIST_HEAD(&new->val); + + if (inherit_children) { + struct callchain_node *next; + + list_splice(&parent->children, &new->children); + INIT_LIST_HEAD(&parent->children); + + list_for_each_entry(next, &new->children, brothers) + next->parent = new; + } list_add_tail(&new->brothers, &parent->children); return new; } +/* + * Fill the node with callchain values + */ static void -fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, - struct symbol **syms) +fill_node(struct callchain_node *node, struct ip_callchain *chain, + int start, struct symbol **syms) { int i; @@ -84,54 +106,80 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, call->sym = syms[i]; list_add_tail(&call->list, &node->val); } - node->val_nr = i - start; + node->val_nr = chain->nr - start; + if (!node->val_nr) + printf("Warning: empty node in callchain tree\n"); } -static void add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct symbol **syms) +static void +add_child(struct callchain_node *parent, struct ip_callchain *chain, + int start, struct symbol **syms) { struct callchain_node *new; - new = create_child(parent); - fill_node(new, chain, parent->val_nr, syms); + new = create_child(parent, false); + fill_node(new, chain, start, syms); new->hit = 1; } +/* + * Split the parent in two parts (a new child is created) and + * give a part of its callchain to the created child. + * Then create another child to host the given callchain of new branch + */ static void split_add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct callchain_list *to_split, int idx, struct symbol **syms) + struct callchain_list *to_split, int idx_parents, int idx_local, + struct symbol **syms) { struct callchain_node *new; + struct list_head *old_tail; + int idx_total = idx_parents + idx_local; /* split */ - new = create_child(parent); - list_move_tail(&to_split->list, &new->val); - new->hit = parent->hit; - parent->hit = 0; - parent->val_nr = idx; + new = create_child(parent, true); + + /* split the callchain and move a part to the new child */ + old_tail = parent->val.prev; + list_del_range(&to_split->list, old_tail); + new->val.next = &to_split->list; + new->val.prev = old_tail; + to_split->list.prev = &new->val; + old_tail->next = &new->val; - /* create the new one */ - add_child(parent, chain, syms); + /* split the hits */ + new->hit = parent->hit; + new->val_nr = parent->val_nr - idx_local; + parent->val_nr = idx_local; + + /* create a new child for the new branch if any */ + if (idx_total < chain->nr) { + parent->hit = 0; + add_child(parent, chain, idx_total, syms); + } else { + parent->hit = 1; + } } static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, int start, struct symbol **syms); -static int +static void __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, - struct symbol **syms) + struct symbol **syms, int start) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, root->val_nr, syms); + int ret = __append_chain(rnode, chain, start, syms); if (!ret) - return 0; + return; } - return -1; + /* nothing in children, add to the current node */ + add_child(root, chain, start, syms); } static int @@ -142,14 +190,22 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, int i = start; bool found = false; - /* lookup in the current node */ + /* + * Lookup in the current node + * If we have a symbol, then compare the start to match + * anywhere inside a function. + */ list_for_each_entry(cnode, &root->val, list) { - if (cnode->ip != chain->ips[i++]) + if (i == chain->nr) + break; + if (cnode->sym && syms[i]) { + if (cnode->sym->start != syms[i]->start) + break; + } else if (cnode->ip != chain->ips[i]) break; if (!found) found = true; - if (i == chain->nr) - break; + i++; } /* matches not, relay on the parent */ @@ -157,23 +213,25 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, return -1; /* we match only a part of the node. Split it and add the new chain */ - if (i < root->val_nr) { - split_add_child(root, chain, cnode, i, syms); + if (i - start < root->val_nr) { + split_add_child(root, chain, cnode, start, i - start, syms); return 0; } /* we match 100% of the path, increment the hit */ - if (i == root->val_nr) { + if (i - start == root->val_nr && i == chain->nr) { root->hit++; return 0; } - return __append_chain_children(root, chain, syms); + /* We match the node and still have a part remaining */ + __append_chain_children(root, chain, syms, i); + + return 0; } void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms) { - if (__append_chain_children(root, chain, syms) == -1) - add_child(root, chain, syms); + __append_chain_children(root, chain, syms, 0); } -- cgit v1.2.3-70-g09d2 From 61c45981ddbd718136d49464f00d2f11938aaa6e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 1 Jul 2009 13:04:34 +1000 Subject: perf_counter tools: Rework event string parsing/syntax This reworks the parser for event descriptors to make it more consistent in what it accepts. It is now structured as a recursive descent parser for the following grammar: events ::= event ( ("," | space) space* event )* event ::= ( raw_event | numeric_event | symbolic_event | generic_hw_event ) [ event_modifier ] raw_event ::= "r" hex_number numeric_event ::= number ":" number number ::= decimal_number | "0x" hex_number | "0" octal_number symbolic_event ::= string_from_event_symbols_array generic_hw_event::= cache_type ( "-" ( cache_op | cache_result ) )* event_modifier ::= ":" ( "u" | "k" | "h" )+ with the extra restriction that you can have at most one cache_op and at most one cache_result. We pass the current string pointer by reference (i.e. as a const char **) to the various parsing functions so that they can advance the pointer to indicate how much they consumed. They return 0 if they didn't recognize the thing at the pointer or 1 if they did (and advance the pointer past it). This also fixes parse_aliases to take the longest matching alias from the table, not the first one. Otherwise "l1-data" would match the "l1-d" alias and the "ata" would not be consumed. This allows event modifiers indicating what processor modes to count in to be applied to any event, not just numeric events, and adds a ":h" modifier to indicate counting in hypervisor mode. Specifying ":u" now sets both exclude_kernel and exclude_hv, and so on. Multiple modes can be specified, e.g. ":uk" will count in user or hypervisor mode (i.e. only exclude_kernel will be set). Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <19018.53826.843815.189847@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 232 ++++++++++++++++++++++++++++------------- 1 file changed, 160 insertions(+), 72 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4d042f104cd..e6b83a3311a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -184,16 +184,20 @@ char *event_name(int counter) return "unknown"; } -static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) +static int parse_aliases(const char **str, char *names[][MAX_ALIASES], int size) { int i, j; + int n, longest = -1; for (i = 0; i < size; i++) { - for (j = 0; j < MAX_ALIASES; j++) { - if (!names[i][j]) - break; - if (strcasestr(str, names[i][j])) - return i; + for (j = 0; j < MAX_ALIASES && names[i][j]; j++) { + n = strlen(names[i][j]); + if (n > longest && !strncasecmp(*str, names[i][j], n)) + longest = n; + } + if (longest > 0) { + *str += longest; + return i; } } @@ -201,30 +205,53 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) } static int -parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) { - int cache_type = -1, cache_op = 0, cache_result = 0; + const char *s = *str; + int cache_type = -1, cache_op = -1, cache_result = -1; - cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); + cache_type = parse_aliases(&s, hw_cache, PERF_COUNT_HW_CACHE_MAX); /* * No fallback - if we cannot get a clear cache type * then bail out: */ if (cache_type == -1) - return -EINVAL; + return 0; + + while ((cache_op == -1 || cache_result == -1) && *s == '-') { + ++s; + + if (cache_op == -1) { + cache_op = parse_aliases(&s, hw_cache_op, + PERF_COUNT_HW_CACHE_OP_MAX); + if (cache_op >= 0) { + if (!is_cache_op_valid(cache_type, cache_op)) + return 0; + continue; + } + } + + if (cache_result == -1) { + cache_result = parse_aliases(&s, hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX); + if (cache_result >= 0) + continue; + } + + /* + * Can't parse this as a cache op or result, so back up + * to the '-'. + */ + --s; + break; + } - cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); /* * Fall back to reads: */ if (cache_op == -1) cache_op = PERF_COUNT_HW_CACHE_OP_READ; - if (!is_cache_op_valid(cache_type, cache_op)) - return -EINVAL; - - cache_result = parse_aliases(str, hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); /* * Fall back to accesses: */ @@ -234,93 +261,154 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) attr->config = cache_type | (cache_op << 8) | (cache_result << 16); attr->type = PERF_TYPE_HW_CACHE; - return 0; + *str = s; + return 1; } static int check_events(const char *str, unsigned int i) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return 1; + int n; - if (strlen(event_symbols[i].alias)) - if (!strncmp(str, event_symbols[i].alias, - strlen(event_symbols[i].alias))) - return 1; + n = strlen(event_symbols[i].symbol); + if (!strncmp(str, event_symbols[i].symbol, n)) + return n; + + n = strlen(event_symbols[i].alias); + if (n) + if (!strncmp(str, event_symbols[i].alias, n)) + return n; return 0; } -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) +static int +parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) { - u64 config, id; - int type; + const char *str = *strp; unsigned int i; - const char *sep, *pstr; + int n; - if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) { - attr->type = PERF_TYPE_RAW; - attr->config = config; + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + n = check_events(str, i); + if (n > 0) { + attr->type = event_symbols[i].type; + attr->config = event_symbols[i].config; + *strp = str + n; + return 1; + } + } + return 0; +} + +static int parse_raw_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + u64 config; + int n; + if (*str != 'r') return 0; + n = hex2u64(str + 1, &config); + if (n > 0) { + *strp = str + n + 1; + attr->type = PERF_TYPE_RAW; + attr->config = config; + return 1; } + return 0; +} - pstr = str; - sep = strchr(pstr, ':'); - if (sep) { - type = atoi(pstr); - pstr = sep + 1; - id = atoi(pstr); - sep = strchr(pstr, ':'); - if (sep) { - pstr = sep + 1; - if (strchr(pstr, 'k')) - attr->exclude_user = 1; - if (strchr(pstr, 'u')) - attr->exclude_kernel = 1; +static int +parse_numeric_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + char *endp; + unsigned long type; + u64 config; + + type = strtoul(str, &endp, 0); + if (endp > str && type < PERF_TYPE_MAX && *endp == ':') { + str = endp + 1; + config = strtoul(str, &endp, 0); + if (endp > str) { + attr->type = type; + attr->config = config; + *strp = endp; + return 1; } - attr->type = type; - attr->config = id; + } + return 0; +} + +static int +parse_event_modifier(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + int eu = 1, ek = 1, eh = 1; + if (*str++ != ':') return 0; + while (*str) { + if (*str == 'u') + eu = 0; + else if (*str == 'k') + ek = 0; + else if (*str == 'h') + eh = 0; + else + break; + ++str; } + if (str >= *strp + 2) { + *strp = str; + attr->exclude_user = eu; + attr->exclude_kernel = ek; + attr->exclude_hv = eh; + return 1; + } + return 0; +} - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (check_events(str, i)) { - attr->type = event_symbols[i].type; - attr->config = event_symbols[i].config; +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) +{ + if (!(parse_raw_event(str, attr) || + parse_numeric_event(str, attr) || + parse_symbolic_event(str, attr) || + parse_generic_hw_event(str, attr))) + return 0; - return 0; - } - } + parse_event_modifier(str, attr); - return parse_generic_hw_symbols(str, attr); + return 1; } int parse_events(const struct option *opt, const char *str, int unset) { struct perf_counter_attr attr; - int ret; - memset(&attr, 0, sizeof(attr)); -again: - if (nr_counters == MAX_COUNTERS) - return -1; + for (;;) { + if (nr_counters == MAX_COUNTERS) + return -1; + + memset(&attr, 0, sizeof(attr)); + if (!parse_event_symbols(&str, &attr)) + return -1; - ret = parse_event_symbols(str, &attr); - if (ret < 0) - return ret; + if (!(*str == 0 || *str == ',' || isspace(*str))) + return -1; - attrs[nr_counters] = attr; - nr_counters++; + attrs[nr_counters] = attr; + nr_counters++; - str = strstr(str, ","); - if (str) { - str++; - goto again; + if (*str == 0) + break; + if (*str == ',') + ++str; + while (isspace(*str)) + ++str; } return 0; -- cgit v1.2.3-70-g09d2 From 88a69dfbc6ab1e3b51bba8c9103055e21089ebb9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 11:17:20 +0200 Subject: perf report: Fix HV bit mismerge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: builtin-report.c: In function ‘hist_entry__add’: builtin-report.c:1015: error: case label not within a switch statement builtin-report.c:1017: error: break statement not within loop or switch Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 197793051fa..7d2b49adcdc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -964,6 +964,9 @@ resolve_callchain(struct thread *thread, struct map *map, } switch (context) { + case PERF_CONTEXT_HV: + dso = hypervisor_dso; + break; case PERF_CONTEXT_KERNEL: dso = kernel_dso; break; @@ -1012,9 +1015,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - case PERF_CONTEXT_HV: - dso = hypervisor_dso; - break; if ((sort__has_parent || callchain) && chain) syms = resolve_callchain(thread, map, chain, &entry); -- cgit v1.2.3-70-g09d2 From f37a291c527c954df4da568de718ebb36b8261c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 12:37:06 +0200 Subject: perf_counter tools: Add more warnings and fix/annotate them Enable -Wextra. This found a few real bugs plus a number of signed/unsigned type mismatches/uncleanlinesses. It also required a few annotations All things considered it was still worth it so lets try with this enabled for now. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- tools/perf/builtin-annotate.c | 12 +++++------ tools/perf/builtin-help.c | 6 ++++-- tools/perf/builtin-list.c | 2 +- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-report.c | 22 ++++++++++---------- tools/perf/builtin-stat.c | 18 +++++++++------- tools/perf/builtin-top.c | 8 +++---- tools/perf/perf.c | 5 +---- tools/perf/perf.h | 2 ++ tools/perf/util/alias.c | 2 +- tools/perf/util/cache.h | 1 + tools/perf/util/callchain.c | 15 +++++++------- tools/perf/util/callchain.h | 10 ++++----- tools/perf/util/color.c | 10 ++++++--- tools/perf/util/config.c | 18 +++++++++------- tools/perf/util/exec_cmd.c | 5 ++++- tools/perf/util/help.c | 26 +++++++++++++---------- tools/perf/util/help.h | 6 +++--- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-options.c | 2 +- tools/perf/util/parse-options.h | 25 +++++++++++----------- tools/perf/util/quote.c | 46 ++++++++++++++++++++++------------------- tools/perf/util/quote.h | 2 +- tools/perf/util/strbuf.c | 13 ++++++------ tools/perf/util/strbuf.h | 10 ++++----- tools/perf/util/wrapper.c | 5 +++-- 27 files changed, 151 insertions(+), 128 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f572c90f610..eddf076b19d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -164,7 +164,7 @@ endif # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 +CFLAGS = $(M64) -ggdb3 -Wall -Wextra -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 722c0f54e54..6cba70daf12 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -160,7 +160,7 @@ static void dsos__fprintf(FILE *fp) static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { - return dso__find_symbol(kernel_dso, ip); + return dso__find_symbol(dso, ip); } static int load_kernel(void) @@ -203,7 +203,7 @@ static u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static u64 vdso__map_ip(struct map *map, u64 ip) +static u64 vdso__map_ip(struct map *map __used, u64 ip) { return ip; } @@ -600,7 +600,7 @@ static LIST_HEAD(hist_entry__sort_list); static int sort_dimension__add(char *tok) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { struct sort_dimension *sd = &sort_dimensions[i]; @@ -1069,7 +1069,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) static const char *prev_color; unsigned int offset; size_t line_len; - u64 line_ip; + s64 line_ip; int ret; char *c; @@ -1428,7 +1428,7 @@ more: head += size; - if (offset + head < stat.st_size) + if (offset + head < (unsigned long)stat.st_size) goto more; rc = EXIT_SUCCESS; @@ -1492,7 +1492,7 @@ static void setup_sorting(void) free(str); } -int cmd_annotate(int argc, const char **argv, const char *prefix) +int cmd_annotate(int argc, const char **argv, const char *prefix __used) { symbol__init(); diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 0f32dc3f3c4..2599d86a733 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -3,6 +3,7 @@ * * Builtin help command */ +#include "perf.h" #include "util/cache.h" #include "builtin.h" #include "util/exec_cmd.h" @@ -277,7 +278,7 @@ static struct cmdnames main_cmds, other_cmds; void list_common_cmds_help(void) { - int i, longest = 0; + unsigned int i, longest = 0; for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { if (longest < strlen(common_cmds[i].name)) @@ -415,9 +416,10 @@ static void show_html_page(const char *perf_cmd) open_html(page_path.buf); } -int cmd_help(int argc, const char **argv, const char *prefix) +int cmd_help(int argc, const char **argv, const char *prefix __used) { const char *alias; + load_command_list("perf-", &main_cmds, &other_cmds); perf_config(perf_help_config, NULL); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index fe60e37c96e..f990fa8a35c 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -13,7 +13,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" -int cmd_list(int argc, const char **argv, const char *prefix) +int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) { print_events(); return 0; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d18546f37d7..4ef78a5e6f3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -294,7 +294,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) while (1) { char bf[BUFSIZ], *pbf = bf; struct mmap_event mmap_ev = { - .header.type = PERF_EVENT_MMAP, + .header = { .type = PERF_EVENT_MMAP }, }; int n; size_t size; @@ -650,7 +650,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_record(int argc, const char **argv, const char *prefix) +int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7d2b49adcdc..007363db3b1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -177,7 +177,7 @@ static void dsos__fprintf(FILE *fp) static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { - return dso__find_symbol(kernel_dso, ip); + return dso__find_symbol(dso, ip); } static int load_kernel(void) @@ -239,7 +239,7 @@ static u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static u64 vdso__map_ip(struct map *map, u64 ip) +static u64 vdso__map_ip(struct map *map __used, u64 ip) { return ip; } @@ -712,7 +712,7 @@ static LIST_HEAD(hist_entry__sort_list); static int sort_dimension__add(char *tok) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { struct sort_dimension *sd = &sort_dimensions[i]; @@ -801,7 +801,7 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) ret += fprintf(fp, " %s\n", chain->sym->name); else ret += fprintf(fp, " %p\n", - (void *)chain->ip); + (void *)(long)chain->ip); } return ret; @@ -938,12 +938,12 @@ static int call__match(struct symbol *sym) } static struct symbol ** -resolve_callchain(struct thread *thread, struct map *map, +resolve_callchain(struct thread *thread, struct map *map __used, struct ip_callchain *chain, struct hist_entry *entry) { - int i; - struct symbol **syms; u64 context = PERF_CONTEXT_MAX; + struct symbol **syms; + unsigned int i; if (callchain) { syms = calloc(chain->nr, sizeof(*syms)); @@ -1183,7 +1183,7 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { - int i; + unsigned int i; if (exclude_other && (se == &sort_parent)) continue; @@ -1271,7 +1271,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (long long)period); if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int i; + unsigned int i; chain = (void *)more_data; @@ -1667,7 +1667,7 @@ more: if (offset + head >= header->data_offset + header->data_size) goto done; - if (offset + head < stat.st_size) + if (offset + head < (unsigned long)stat.st_size) goto more; done: @@ -1756,7 +1756,7 @@ static void setup_list(struct strlist **list, const char *list_str, } } -int cmd_report(int argc, const char **argv, const char *prefix) +int cmd_report(int argc, const char **argv, const char *prefix __used) { symbol__init(); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2e03524a1de..095a90e012a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -64,7 +64,7 @@ static struct perf_counter_attr default_attrs[] = { static int system_wide = 0; static int verbose = 0; -static int nr_cpus = 0; +static unsigned int nr_cpus = 0; static int run_idx = 0; static int run_count = 1; @@ -108,7 +108,8 @@ static void create_perf_stat_counter(int counter, int pid) PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { - int cpu; + unsigned int cpu; + for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0 && verbose) @@ -150,8 +151,8 @@ static inline int nsec_counter(int counter) static void read_counter(int counter) { u64 *count, single_count[3]; - ssize_t res; - int cpu, nv; + unsigned int cpu; + size_t res, nv; int scaled; count = event_res[run_idx][counter]; @@ -165,6 +166,7 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -200,7 +202,7 @@ static void read_counter(int counter) runtime_cycles[run_idx] = count[0]; } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc __used, const char **argv) { unsigned long long t0, t1; int status = 0; @@ -390,7 +392,7 @@ static void calc_avg(void) event_res_avg[j]+1, event_res[i][j]+1); update_avg("counter/2", j, event_res_avg[j]+2, event_res[i][j]+2); - if (event_scaled[i][j] != -1) + if (event_scaled[i][j] != (u64)-1) update_avg("scaled", j, event_scaled_avg + j, event_scaled[i]+j); else @@ -510,7 +512,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_stat(int argc, const char **argv, const char *prefix) +int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; @@ -528,7 +530,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); + assert((int)nr_cpus >= 0); /* * We dont want to block the signals - that would cause diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0506cd6e04c..5f5e7df8302 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -269,7 +269,7 @@ static void print_sym_table(void) } } -static void *display_thread(void *arg) +static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; int delay_msecs = delay_secs * 1000; @@ -287,7 +287,7 @@ static void *display_thread(void *arg) } /* Tag samples to be skipped. */ -char *skip_symbols[] = { +static const char *skip_symbols[] = { "default_idle", "cpu_idle", "enter_idle", @@ -426,7 +426,7 @@ static void process_event(u64 ip, int counter, int user) struct mmap_data { int counter; void *base; - unsigned int mask; + int mask; unsigned int prev; }; @@ -705,7 +705,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_top(int argc, const char **argv, const char *prefix) +int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4eb72593370..c5656784c61 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -229,9 +229,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); - if (p->option & NEED_WORK_TREE) - /* setup_work_tree() */; - status = p->fn(argc, argv, prefix); if (status) return status & 0xff; @@ -266,7 +263,7 @@ static void handle_internal_command(int argc, const char **argv) { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, }; - int i; + unsigned int i; static const char ext[] = STRIP_EXTENSION; if (sizeof(ext) > 1) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ce394192c85..27887c91643 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,6 +52,8 @@ static inline unsigned long long rdclock(void) #define __user #define asmlinkage +#define __used __attribute__((__unused__)) + #define unlikely(x) __builtin_expect(!!(x), 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c index 9b3dd2b428d..b8144e80bb1 100644 --- a/tools/perf/util/alias.c +++ b/tools/perf/util/alias.c @@ -3,7 +3,7 @@ static const char *alias_key; static char *alias_val; -static int alias_lookup_cb(const char *k, const char *v, void *cb) +static int alias_lookup_cb(const char *k, const char *v, void *cb __used) { if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { if (!v) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 393d6146d13..161d5f413e2 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -3,6 +3,7 @@ #include "util.h" #include "strbuf.h" +#include "../perf.h" #define PERF_DIR_ENVIRONMENT "PERF_DIR" #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 440db12c635..3dceabd9b5e 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -92,7 +92,7 @@ static void fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, struct symbol **syms) { - int i; + unsigned int i; for (i = start; i < chain->nr; i++) { struct callchain_list *call; @@ -135,7 +135,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, { struct callchain_node *new; struct list_head *old_tail; - int idx_total = idx_parents + idx_local; + unsigned int idx_total = idx_parents + idx_local; /* split */ new = create_child(parent, true); @@ -164,17 +164,18 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start, struct symbol **syms); + unsigned int start, struct symbol **syms); static void __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, - struct symbol **syms, int start) + struct symbol **syms, unsigned int start) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, start, syms); + unsigned int ret = __append_chain(rnode, chain, start, syms); + if (!ret) return; } @@ -184,10 +185,10 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start, struct symbol **syms) + unsigned int start, struct symbol **syms) { struct callchain_list *cnode; - int i = start; + unsigned int i = start; bool found = false; /* diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c942daa712e..251d99ecd22 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -10,15 +10,15 @@ struct callchain_node { struct callchain_node *parent; struct list_head brothers; - struct list_head children; - struct list_head val; + struct list_head children; + struct list_head val; struct rb_node rb_node; - int val_nr; - int hit; + unsigned int val_nr; + u64 hit; }; struct callchain_list { - unsigned long ip; + u64 ip; struct symbol *sym; struct list_head list; }; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 9a8c20ccc53..26f82318b86 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -11,7 +11,8 @@ static int parse_color(const char *name, int len) }; char *end; int i; - for (i = 0; i < ARRAY_SIZE(color_names); i++) { + + for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { const char *str = color_names[i]; if (!strncasecmp(name, str, len) && !str[len]) return i - 1; @@ -28,7 +29,8 @@ static int parse_attr(const char *name, int len) static const char * const attr_names[] = { "bold", "dim", "ul", "blink", "reverse" }; - int i; + unsigned int i; + for (i = 0; i < ARRAY_SIZE(attr_names); i++) { const char *str = attr_names[i]; if (!strncasecmp(name, str, len) && !str[len]) @@ -222,10 +224,12 @@ int color_fwrite_lines(FILE *fp, const char *color, { if (!*color) return fwrite(buf, count, 1, fp) != 1; + while (count) { char *p = memchr(buf, '\n', count); + if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? p - buf : count, 1, fp) != 1 || + fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || fputs(PERF_COLOR_RESET, fp) < 0)) return -1; if (!p) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3dd13faa6a2..780df541006 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -47,10 +47,12 @@ static int get_next_char(void) static char *parse_value(void) { static char value[1024]; - int quote = 0, comment = 0, len = 0, space = 0; + int quote = 0, comment = 0, space = 0; + size_t len = 0; for (;;) { int c = get_next_char(); + if (len >= sizeof(value) - 1) return NULL; if (c == '\n') { @@ -353,13 +355,13 @@ int perf_config_string(const char **dest, const char *var, const char *value) return 0; } -static int perf_default_core_config(const char *var, const char *value) +static int perf_default_core_config(const char *var __used, const char *value __used) { /* Add other config variables here and to Documentation/config.txt. */ return 0; } -int perf_default_config(const char *var, const char *value, void *dummy) +int perf_default_config(const char *var, const char *value, void *dummy __used) { if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); @@ -471,10 +473,10 @@ static int matches(const char* key, const char* value) !regexec(store.value_regex, value, 0, NULL, 0))); } -static int store_aux(const char* key, const char* value, void *cb) +static int store_aux(const char* key, const char* value, void *cb __used) { + int section_len; const char *ep; - size_t section_len; switch (store.state) { case KEY_SEEN: @@ -551,7 +553,7 @@ static int store_write_section(int fd, const char* key) strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); } - success = write_in_full(fd, sb.buf, sb.len) == sb.len; + success = (write_in_full(fd, sb.buf, sb.len) == (ssize_t)sb.len); strbuf_release(&sb); return success; @@ -599,7 +601,7 @@ static int store_write_pair(int fd, const char* key, const char* value) } strbuf_addf(&sb, "%s\n", quote); - success = write_in_full(fd, sb.buf, sb.len) == sb.len; + success = (write_in_full(fd, sb.buf, sb.len) == (ssize_t)sb.len); strbuf_release(&sb); return success; @@ -741,7 +743,7 @@ int perf_config_set_multivar(const char* key, const char* value, } else { struct stat st; char* contents; - size_t contents_sz, copy_begin, copy_end; + ssize_t contents_sz, copy_begin, copy_end; int i, new_line = 0; if (value_regex == NULL) diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c index d3929226315..34a35286738 100644 --- a/tools/perf/util/exec_cmd.c +++ b/tools/perf/util/exec_cmd.c @@ -1,6 +1,9 @@ #include "cache.h" #include "exec_cmd.h" #include "quote.h" + +#include + #define MAX_ARGS 32 extern char **environ; @@ -51,7 +54,7 @@ const char *perf_extract_argv0_path(const char *argv0) slash--; if (slash >= argv0) { - argv0_path = strndup(argv0, slash - argv0); + argv0_path = xstrndup(argv0, slash - argv0); return slash + 1; } diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 17a00e0df2c..fbb00978b2e 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -26,7 +26,7 @@ static int term_columns(void) return 80; } -void add_cmdname(struct cmdnames *cmds, const char *name, int len) +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) { struct cmdname *ent = malloc(sizeof(*ent) + len + 1); @@ -40,7 +40,8 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len) static void clean_cmdnames(struct cmdnames *cmds) { - int i; + unsigned int i; + for (i = 0; i < cmds->cnt; ++i) free(cmds->names[i]); free(cmds->names); @@ -57,7 +58,7 @@ static int cmdname_compare(const void *a_, const void *b_) static void uniq(struct cmdnames *cmds) { - int i, j; + unsigned int i, j; if (!cmds->cnt) return; @@ -71,7 +72,7 @@ static void uniq(struct cmdnames *cmds) void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) { - int ci, cj, ei; + size_t ci, cj, ei; int cmp; ci = cj = ei = 0; @@ -106,8 +107,9 @@ static void pretty_print_string_list(struct cmdnames *cmds, int longest) printf(" "); for (j = 0; j < cols; j++) { - int n = j * rows + i; - int size = space; + unsigned int n = j * rows + i; + unsigned int size = space; + if (n >= cmds->cnt) break; if (j == cols-1 || n + rows >= cmds->cnt) @@ -208,7 +210,7 @@ void load_command_list(const char *prefix, void list_commands(const char *title, struct cmdnames *main_cmds, struct cmdnames *other_cmds) { - int i, longest = 0; + unsigned int i, longest = 0; for (i = 0; i < main_cmds->cnt; i++) if (longest < main_cmds->names[i]->len) @@ -239,7 +241,8 @@ void list_commands(const char *title, struct cmdnames *main_cmds, int is_in_cmdlist(struct cmdnames *c, const char *s) { - int i; + unsigned int i; + for (i = 0; i < c->cnt; i++) if (!strcmp(s, c->names[i]->name)) return 1; @@ -271,7 +274,8 @@ static int levenshtein_compare(const void *p1, const void *p2) static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) { - int i; + unsigned int i; + ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); for (i = 0; i < old->cnt; i++) @@ -283,7 +287,7 @@ static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) const char *help_unknown_cmd(const char *cmd) { - int i, n = 0, best_similarity = 0; + unsigned int i, n = 0, best_similarity = 0; struct cmdnames main_cmds, other_cmds; memset(&main_cmds, 0, sizeof(main_cmds)); @@ -345,7 +349,7 @@ const char *help_unknown_cmd(const char *cmd) exit(1); } -int cmd_version(int argc, const char **argv, const char *prefix) +int cmd_version(int argc __used, const char **argv __used, const char *prefix __used) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h index 56bc15406ff..7128783637b 100644 --- a/tools/perf/util/help.h +++ b/tools/perf/util/help.h @@ -2,8 +2,8 @@ #define HELP_H struct cmdnames { - int alloc; - int cnt; + size_t alloc; + size_t cnt; struct cmdname { size_t len; /* also used for similarity index in help.c */ char name[FLEX_ARRAY]; @@ -19,7 +19,7 @@ static inline void mput_char(char c, unsigned int num) void load_command_list(const char *prefix, struct cmdnames *main_cmds, struct cmdnames *other_cmds); -void add_cmdname(struct cmdnames *cmds, const char *name, int len); +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len); /* Here we require that excludes is a sorted list. */ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); int is_in_cmdlist(struct cmdnames *c, const char *s); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e6b83a3311a..aed70901df2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -385,7 +385,7 @@ static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) return 1; } -int parse_events(const struct option *opt, const char *str, int unset) +int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_counter_attr attr; diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b3affb1658d..9a897b7cce7 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -485,7 +485,7 @@ int parse_options_usage(const char * const *usagestr, } -int parse_opt_verbosity_cb(const struct option *opt, const char *arg, +int parse_opt_verbosity_cb(const struct option *opt, const char *arg __used, int unset) { int *target = opt->value; diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index a1039a6ce0e..15c8aba9c62 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -90,21 +90,20 @@ struct option { intptr_t defval; }; -#define OPT_END() { OPTION_END } -#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } -#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } -#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } -#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } -#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } -#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } -#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } -#define OPT_LONG(s, l, v, h) { OPTION_LONG, (s), (l), (v), NULL, (h) } -#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } +#define OPT_END() { .type = OPTION_END } +#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } +#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } +#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (b) } +#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_SET_INT(s, l, v, h, i) { .type = OPTION_SET_INT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (i) } +#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } +#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h) } #define OPT_DATE(s, l, v, h) \ - { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ - parse_opt_approxidate_cb } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ - { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } /* parse_options() will filter out the processed options and leave the * non-option argments in argv[]. diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index f18c5212bc9..c6e5dc0dc82 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -162,12 +162,16 @@ static inline int sq_must_quote(char c) return sq_lookup[(unsigned char)c] + quote_path_fully > 0; } -/* returns the longest prefix not needing a quote up to maxlen if positive. - This stops at the first \0 because it's marked as a character needing an - escape */ -static size_t next_quote_pos(const char *s, ssize_t maxlen) +/* + * Returns the longest prefix not needing a quote up to maxlen if + * positive. + * This stops at the first \0 because it's marked as a character + * needing an escape. + */ +static ssize_t next_quote_pos(const char *s, ssize_t maxlen) { - size_t len; + ssize_t len; + if (maxlen < 0) { for (len = 0; !sq_must_quote(s[len]); len++); } else { @@ -192,22 +196,22 @@ static size_t next_quote_pos(const char *s, ssize_t maxlen) static size_t quote_c_style_counted(const char *name, ssize_t maxlen, struct strbuf *sb, FILE *fp, int no_dq) { -#undef EMIT -#define EMIT(c) \ - do { \ - if (sb) strbuf_addch(sb, (c)); \ - if (fp) fputc((c), fp); \ - count++; \ +#define EMIT(c) \ + do { \ + if (sb) strbuf_addch(sb, (c)); \ + if (fp) fputc((c), fp); \ + count++; \ } while (0) -#define EMITBUF(s, l) \ - do { \ - int __ret; \ - if (sb) strbuf_add(sb, (s), (l)); \ - if (fp) __ret = fwrite((s), (l), 1, fp); \ - count += (l); \ + +#define EMITBUF(s, l) \ + do { \ + int __ret; \ + if (sb) strbuf_add(sb, (s), (l)); \ + if (fp) __ret = fwrite((s), (l), 1, fp); \ + count += (l); \ } while (0) - size_t len, count = 0; + ssize_t len, count = 0; const char *p = name; for (;;) { @@ -273,8 +277,8 @@ void write_name_quoted(const char *name, FILE *fp, int terminator) fputc(terminator, fp); } -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *fp, int terminator) +void write_name_quotedpfx(const char *pfx, ssize_t pfxlen, + const char *name, FILE *fp, int terminator) { int needquote = 0; @@ -306,7 +310,7 @@ char *quote_path_relative(const char *in, int len, len = strlen(in); /* "../" prefix itself does not need quoting, but "in" might. */ - needquote = next_quote_pos(in, len) < len; + needquote = (next_quote_pos(in, len) < len); strbuf_setlen(out, 0); strbuf_grow(out, len); diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 5dfad89816d..a5454a1d1c1 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -53,7 +53,7 @@ extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); extern void write_name_quoted(const char *name, FILE *, int terminator); -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, +extern void write_name_quotedpfx(const char *pfx, ssize_t pfxlen, const char *name, FILE *, int terminator); /* quote path as relative to the given prefix */ diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 464e7ca898c..5249d5a1b0c 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -16,7 +16,7 @@ int prefixcmp(const char *str, const char *prefix) */ char strbuf_slopbuf[1]; -void strbuf_init(struct strbuf *sb, size_t hint) +void strbuf_init(struct strbuf *sb, ssize_t hint) { sb->alloc = sb->len = 0; sb->buf = strbuf_slopbuf; @@ -92,7 +92,8 @@ void strbuf_ltrim(struct strbuf *sb) void strbuf_tolower(struct strbuf *sb) { - int i; + unsigned int i; + for (i = 0; i < sb->len; i++) sb->buf[i] = tolower(sb->buf[i]); } @@ -264,7 +265,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) return res; } -ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) +ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; size_t oldalloc = sb->alloc; @@ -293,7 +294,7 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) #define STRBUF_MAXLINK (2*PATH_MAX) -int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) +int strbuf_readlink(struct strbuf *sb, const char *path, ssize_t hint) { size_t oldalloc = sb->alloc; @@ -301,7 +302,7 @@ int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) hint = 32; while (hint < STRBUF_MAXLINK) { - int len; + ssize_t len; strbuf_grow(sb, hint); len = readlink(path, sb->buf, hint); @@ -343,7 +344,7 @@ int strbuf_getline(struct strbuf *sb, FILE *fp, int term) return 0; } -int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) +int strbuf_read_file(struct strbuf *sb, const char *path, ssize_t hint) { int fd, len; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 9ee908a3ec5..d2aa86c014c 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -50,7 +50,7 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *, size_t); +extern void strbuf_init(struct strbuf *buf, ssize_t hint); extern void strbuf_release(struct strbuf *); extern char *strbuf_detach(struct strbuf *, size_t *); extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); @@ -61,7 +61,7 @@ static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { } /*----- strbuf size related -----*/ -static inline size_t strbuf_avail(const struct strbuf *sb) { +static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } @@ -122,9 +122,9 @@ extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); /* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); -extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); -extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); +extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); +extern int strbuf_read_file(struct strbuf *sb, const char *path, ssize_t hint); +extern int strbuf_readlink(struct strbuf *sb, const char *path, ssize_t hint); extern int strbuf_getline(struct strbuf *, FILE *, int); diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 6350d65f6d9..4574ac28396 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -7,7 +7,7 @@ * There's no pack memory to release - but stay close to the Git * version so wrap this away: */ -static inline void release_pack_memory(size_t size, int flag) +static inline void release_pack_memory(size_t size __used, int flag __used) { } @@ -59,7 +59,8 @@ void *xmemdupz(const void *data, size_t len) char *xstrndup(const char *str, size_t len) { char *p = memchr(str, '\0', len); - return xmemdupz(str, p ? p - str : len); + + return xmemdupz(str, p ? (size_t)(p - str) : len); } void *xrealloc(void *ptr, size_t size) -- cgit v1.2.3-70-g09d2 From b9ebdcc0ce1c676ebf5dc4f6df6b440d8fcf88ab Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 15:05:09 +0530 Subject: perf stat: Define MATCH_EVENT for easy attr checking MATCH_EVENT is useful: 1. for multiple attrs checking 2. avoid repetition of PERF_TYPE_ and PERF_COUNT_ and save space 3. avoids line breakage Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246440909.3403.5.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 095a90e012a..01cc07efb72 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -96,6 +96,10 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; +#define MATCH_EVENT(t, c, counter) \ + (attrs[counter].type == PERF_TYPE_##t && \ + attrs[counter].config == PERF_COUNT_##c) + #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" @@ -133,13 +137,8 @@ static void create_perf_stat_counter(int counter, int pid) */ static inline int nsec_counter(int counter) { - if (attrs[counter].type != PERF_TYPE_SOFTWARE) - return 0; - - if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) - return 1; - - if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || + MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) return 1; return 0; @@ -194,11 +193,9 @@ static void read_counter(int counter) /* * Save the full runtime - to allow normalization during printout: */ - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) runtime_nsecs[run_idx] = count[0]; - if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) + if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) runtime_cycles[run_idx] = count[0]; } @@ -292,9 +289,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { if (walltime_nsecs_avg) fprintf(stderr, " # %10.3f CPUs ", (double)count[0] / (double)walltime_nsecs_avg); @@ -307,9 +302,7 @@ static void abs_printout(int counter, u64 *count, u64 *noise) fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); if (runtime_cycles_avg && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - + MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { fprintf(stderr, " # %10.3f IPC ", (double)count[0] / (double)runtime_cycles_avg); } else { -- cgit v1.2.3-70-g09d2 From 73c24cb86c51ff6445b292d9914d31236204393b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 18:36:18 +0530 Subject: perf list: Add cache events After: $ ./perf list List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults OR faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] L1-d$-loads [Hardware cache event] L1-d$-load-misses [Hardware cache event] L1-d$-stores [Hardware cache event] L1-d$-store-misses [Hardware cache event] L1-d$-prefetches [Hardware cache event] L1-d$-prefetch-misses [Hardware cache event] L1-i$-loads [Hardware cache event] L1-i$-load-misses [Hardware cache event] L1-i$-prefetches [Hardware cache event] L1-i$-prefetch-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-stores [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-prefetches [Hardware cache event] LLC-prefetch-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-stores [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-prefetches [Hardware cache event] dTLB-prefetch-misses [Hardware cache event] iTLB-loads [Hardware cache event] iTLB-load-misses [Hardware cache event] branch-loads [Hardware cache event] branch-load-misses [Hardware cache event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1246453578.3072.1.camel@ht.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aed70901df2..5184959e061 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -428,7 +428,7 @@ static const char * const event_type_descriptors[] = { void print_events(void) { struct event_symbol *syms = event_symbols; - unsigned int i, type, prev_type = -1; + unsigned int i, type, op, prev_type = -1; char name[40]; fprintf(stderr, "\n"); @@ -452,6 +452,21 @@ void print_events(void) prev_type = type; } + fprintf(stderr, "\n"); + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + fprintf(stderr, " %-40s [%s]\n", + event_cache_name(type, op, i), + event_type_descriptors[4]); + } + } + } + fprintf(stderr, "\n"); fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); -- cgit v1.2.3-70-g09d2 From 43cbcd8acb4c992cbd22d1ec8a08c0591be5d719 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 1 Jul 2009 12:28:37 -0300 Subject: perf_counter tools: Share rbtree.with the kernel The tools/perf/util/rbtree.c copy already drifted by three csets: 4b324126e0c6c3a5080ca3ec0981e8766ed6f1ee 4c60117811171d867d4f27f17ea07d7419d45dae 16c047add3ceaf0ab882e3e094d1ec904d02312d So remove the copy and use the lib/rbtree.c directly, sharing the source code while still generating a separate object file, since tools/perf uses a far more agressive -O6 switch. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090701152837.GG15682@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 7 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/callchain.h | 2 +- tools/perf/util/include/linux/kernel.h | 21 ++ tools/perf/util/include/linux/module.h | 6 + tools/perf/util/include/linux/rbtree.h | 1 + tools/perf/util/rbtree.c | 383 --------------------------------- tools/perf/util/rbtree.h | 171 --------------- tools/perf/util/strlist.h | 2 +- tools/perf/util/symbol.h | 2 +- 12 files changed, 39 insertions(+), 562 deletions(-) create mode 100644 tools/perf/util/include/linux/kernel.h create mode 100644 tools/perf/util/include/linux/module.h create mode 100644 tools/perf/util/include/linux/rbtree.h delete mode 100644 tools/perf/util/rbtree.c delete mode 100644 tools/perf/util/rbtree.h (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index eddf076b19d..c4f48825a6e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -223,7 +223,7 @@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Those must not be GNU-specific; they are shared with perl/ which may # be built by a different compiler. (Note that this is an artifact now # but it still might be nice to keep that distinction.) -BASIC_CFLAGS = +BASIC_CFLAGS = -Iutil/include BASIC_LDFLAGS = # Guard against environment variables @@ -289,10 +289,10 @@ export PERL_PATH LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h +LIB_H += ../../include/linux/rbtree.h LIB_H += perf.h LIB_H += util/types.h LIB_H += util/list.h -LIB_H += util/rbtree.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h LIB_H += util/parse-events.h @@ -691,6 +691,9 @@ builtin-init-db.o: builtin-init-db.c PERF-CFLAGS util/config.o: util/config.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6cba70daf12..1960c22b816 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -12,7 +12,7 @@ #include "util/color.h" #include "util/list.h" #include "util/cache.h" -#include "util/rbtree.h" +#include #include "util/symbol.h" #include "util/string.h" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 007363db3b1..0d35e2b8881 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -12,7 +12,7 @@ #include "util/color.h" #include "util/list.h" #include "util/cache.h" -#include "util/rbtree.h" +#include #include "util/symbol.h" #include "util/string.h" #include "util/callchain.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5f5e7df8302..cdc74cfb151 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -23,7 +23,7 @@ #include "util/symbol.h" #include "util/color.h" #include "util/util.h" -#include "util/rbtree.h" +#include #include "util/parse-options.h" #include "util/parse-events.h" diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 251d99ecd22..0606b8fd05a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -3,7 +3,7 @@ #include "../perf.h" #include "list.h" -#include "rbtree.h" +#include #include "symbol.h" diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h new file mode 100644 index 00000000000..99c1b3d1edd --- /dev/null +++ b/tools/perf/util/include/linux/kernel.h @@ -0,0 +1,21 @@ +#ifndef PERF_LINUX_KERNEL_H_ +#define PERF_LINUX_KERNEL_H_ + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#endif diff --git a/tools/perf/util/include/linux/module.h b/tools/perf/util/include/linux/module.h new file mode 100644 index 00000000000..b43e2dc21e0 --- /dev/null +++ b/tools/perf/util/include/linux/module.h @@ -0,0 +1,6 @@ +#ifndef PERF_LINUX_MODULE_H +#define PERF_LINUX_MODULE_H + +#define EXPORT_SYMBOL(name) + +#endif diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h new file mode 100644 index 00000000000..7a243a14303 --- /dev/null +++ b/tools/perf/util/include/linux/rbtree.h @@ -0,0 +1 @@ +#include "../../../../include/linux/rbtree.h" diff --git a/tools/perf/util/rbtree.c b/tools/perf/util/rbtree.c deleted file mode 100644 index b15ba9c7cb3..00000000000 --- a/tools/perf/util/rbtree.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - (C) 2002 David Woodhouse - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -*/ - -#include "rbtree.h" - -static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *right = node->rb_right; - struct rb_node *parent = rb_parent(node); - - if ((node->rb_right = right->rb_left)) - rb_set_parent(right->rb_left, node); - right->rb_left = node; - - rb_set_parent(right, parent); - - if (parent) - { - if (node == parent->rb_left) - parent->rb_left = right; - else - parent->rb_right = right; - } - else - root->rb_node = right; - rb_set_parent(node, right); -} - -static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *left = node->rb_left; - struct rb_node *parent = rb_parent(node); - - if ((node->rb_left = left->rb_right)) - rb_set_parent(left->rb_right, node); - left->rb_right = node; - - rb_set_parent(left, parent); - - if (parent) - { - if (node == parent->rb_right) - parent->rb_right = left; - else - parent->rb_left = left; - } - else - root->rb_node = left; - rb_set_parent(node, left); -} - -void rb_insert_color(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *parent, *gparent; - - while ((parent = rb_parent(node)) && rb_is_red(parent)) - { - gparent = rb_parent(parent); - - if (parent == gparent->rb_left) - { - { - register struct rb_node *uncle = gparent->rb_right; - if (uncle && rb_is_red(uncle)) - { - rb_set_black(uncle); - rb_set_black(parent); - rb_set_red(gparent); - node = gparent; - continue; - } - } - - if (parent->rb_right == node) - { - register struct rb_node *tmp; - __rb_rotate_left(parent, root); - tmp = parent; - parent = node; - node = tmp; - } - - rb_set_black(parent); - rb_set_red(gparent); - __rb_rotate_right(gparent, root); - } else { - { - register struct rb_node *uncle = gparent->rb_left; - if (uncle && rb_is_red(uncle)) - { - rb_set_black(uncle); - rb_set_black(parent); - rb_set_red(gparent); - node = gparent; - continue; - } - } - - if (parent->rb_left == node) - { - register struct rb_node *tmp; - __rb_rotate_right(parent, root); - tmp = parent; - parent = node; - node = tmp; - } - - rb_set_black(parent); - rb_set_red(gparent); - __rb_rotate_left(gparent, root); - } - } - - rb_set_black(root->rb_node); -} - -static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, - struct rb_root *root) -{ - struct rb_node *other; - - while ((!node || rb_is_black(node)) && node != root->rb_node) - { - if (parent->rb_left == node) - { - other = parent->rb_right; - if (rb_is_red(other)) - { - rb_set_black(other); - rb_set_red(parent); - __rb_rotate_left(parent, root); - other = parent->rb_right; - } - if ((!other->rb_left || rb_is_black(other->rb_left)) && - (!other->rb_right || rb_is_black(other->rb_right))) - { - rb_set_red(other); - node = parent; - parent = rb_parent(node); - } - else - { - if (!other->rb_right || rb_is_black(other->rb_right)) - { - rb_set_black(other->rb_left); - rb_set_red(other); - __rb_rotate_right(other, root); - other = parent->rb_right; - } - rb_set_color(other, rb_color(parent)); - rb_set_black(parent); - rb_set_black(other->rb_right); - __rb_rotate_left(parent, root); - node = root->rb_node; - break; - } - } - else - { - other = parent->rb_left; - if (rb_is_red(other)) - { - rb_set_black(other); - rb_set_red(parent); - __rb_rotate_right(parent, root); - other = parent->rb_left; - } - if ((!other->rb_left || rb_is_black(other->rb_left)) && - (!other->rb_right || rb_is_black(other->rb_right))) - { - rb_set_red(other); - node = parent; - parent = rb_parent(node); - } - else - { - if (!other->rb_left || rb_is_black(other->rb_left)) - { - rb_set_black(other->rb_right); - rb_set_red(other); - __rb_rotate_left(other, root); - other = parent->rb_left; - } - rb_set_color(other, rb_color(parent)); - rb_set_black(parent); - rb_set_black(other->rb_left); - __rb_rotate_right(parent, root); - node = root->rb_node; - break; - } - } - } - if (node) - rb_set_black(node); -} - -void rb_erase(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *child, *parent; - int color; - - if (!node->rb_left) - child = node->rb_right; - else if (!node->rb_right) - child = node->rb_left; - else - { - struct rb_node *old = node, *left; - - node = node->rb_right; - while ((left = node->rb_left) != NULL) - node = left; - child = node->rb_right; - parent = rb_parent(node); - color = rb_color(node); - - if (child) - rb_set_parent(child, parent); - if (parent == old) { - parent->rb_right = child; - parent = node; - } else - parent->rb_left = child; - - node->rb_parent_color = old->rb_parent_color; - node->rb_right = old->rb_right; - node->rb_left = old->rb_left; - - if (rb_parent(old)) - { - if (rb_parent(old)->rb_left == old) - rb_parent(old)->rb_left = node; - else - rb_parent(old)->rb_right = node; - } else - root->rb_node = node; - - rb_set_parent(old->rb_left, node); - if (old->rb_right) - rb_set_parent(old->rb_right, node); - goto color; - } - - parent = rb_parent(node); - color = rb_color(node); - - if (child) - rb_set_parent(child, parent); - if (parent) - { - if (parent->rb_left == node) - parent->rb_left = child; - else - parent->rb_right = child; - } - else - root->rb_node = child; - - color: - if (color == RB_BLACK) - __rb_erase_color(child, parent, root); -} - -/* - * This function returns the first node (in sort order) of the tree. - */ -struct rb_node *rb_first(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_left) - n = n->rb_left; - return n; -} - -struct rb_node *rb_last(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_right) - n = n->rb_right; - return n; -} - -struct rb_node *rb_next(const struct rb_node *node) -{ - struct rb_node *parent; - - if (rb_parent(node) == node) - return NULL; - - /* If we have a right-hand child, go down and then left as far - as we can. */ - if (node->rb_right) { - node = node->rb_right; - while (node->rb_left) - node=node->rb_left; - return (struct rb_node *)node; - } - - /* No right-hand children. Everything down and left is - smaller than us, so any 'next' node must be in the general - direction of our parent. Go up the tree; any time the - ancestor is a right-hand child of its parent, keep going - up. First time it's a left-hand child of its parent, said - parent is our 'next' node. */ - while ((parent = rb_parent(node)) && node == parent->rb_right) - node = parent; - - return parent; -} - -struct rb_node *rb_prev(const struct rb_node *node) -{ - struct rb_node *parent; - - if (rb_parent(node) == node) - return NULL; - - /* If we have a left-hand child, go down and then right as far - as we can. */ - if (node->rb_left) { - node = node->rb_left; - while (node->rb_right) - node=node->rb_right; - return (struct rb_node *)node; - } - - /* No left-hand children. Go up till we find an ancestor which - is a right-hand child of its parent */ - while ((parent = rb_parent(node)) && node == parent->rb_left) - node = parent; - - return parent; -} - -void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root) -{ - struct rb_node *parent = rb_parent(victim); - - /* Set the surrounding nodes to point to the replacement */ - if (parent) { - if (victim == parent->rb_left) - parent->rb_left = new; - else - parent->rb_right = new; - } else { - root->rb_node = new; - } - if (victim->rb_left) - rb_set_parent(victim->rb_left, new); - if (victim->rb_right) - rb_set_parent(victim->rb_right, new); - - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; -} diff --git a/tools/perf/util/rbtree.h b/tools/perf/util/rbtree.h deleted file mode 100644 index 6bdc488a47f..00000000000 --- a/tools/perf/util/rbtree.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/include/linux/rbtree.h - - To use rbtrees you'll have to implement your own insert and search cores. - This will avoid us to use callbacks and to drop drammatically performances. - I know it's not the cleaner way, but in C (not in C++) to get - performances and genericity... - - Some example of insert and search follows here. The search is a plain - normal search over an ordered tree. The insert instead must be implemented - int two steps: as first thing the code must insert the element in - order as a red leaf in the tree, then the support library function - rb_insert_color() must be called. Such function will do the - not trivial work to rebalance the rbtree if necessary. - ------------------------------------------------------------------------ -static inline struct page * rb_search_page_cache(struct inode * inode, - unsigned long offset) -{ - struct rb_node * n = inode->i_rb_page_cache.rb_node; - struct page * page; - - while (n) - { - page = rb_entry(n, struct page, rb_page_cache); - - if (offset < page->offset) - n = n->rb_left; - else if (offset > page->offset) - n = n->rb_right; - else - return page; - } - return NULL; -} - -static inline struct page * __rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct rb_node ** p = &inode->i_rb_page_cache.rb_node; - struct rb_node * parent = NULL; - struct page * page; - - while (*p) - { - parent = *p; - page = rb_entry(parent, struct page, rb_page_cache); - - if (offset < page->offset) - p = &(*p)->rb_left; - else if (offset > page->offset) - p = &(*p)->rb_right; - else - return page; - } - - rb_link_node(node, parent, p); - - return NULL; -} - -static inline struct page * rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct page * ret; - if ((ret = __rb_insert_page_cache(inode, offset, node))) - goto out; - rb_insert_color(node, &inode->i_rb_page_cache); - out: - return ret; -} ------------------------------------------------------------------------ -*/ - -#ifndef _LINUX_RBTREE_H -#define _LINUX_RBTREE_H - -#include - -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -struct rb_node -{ - unsigned long rb_parent_color; -#define RB_RED 0 -#define RB_BLACK 1 - struct rb_node *rb_right; - struct rb_node *rb_left; -} __attribute__((aligned(sizeof(long)))); - /* The alignment might seem pointless, but allegedly CRIS needs it */ - -struct rb_root -{ - struct rb_node *rb_node; -}; - - -#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -#define rb_color(r) ((r)->rb_parent_color & 1) -#define rb_is_red(r) (!rb_color(r)) -#define rb_is_black(r) rb_color(r) -#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) - -static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -{ - rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -} -static inline void rb_set_color(struct rb_node *rb, int color) -{ - rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -} - -#define RB_ROOT (struct rb_root) { NULL, } -#define rb_entry(ptr, type, member) container_of(ptr, type, member) - -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) - -extern void rb_insert_color(struct rb_node *, struct rb_root *); -extern void rb_erase(struct rb_node *, struct rb_root *); - -/* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(const struct rb_node *); -extern struct rb_node *rb_prev(const struct rb_node *); -extern struct rb_node *rb_first(const struct rb_root *); -extern struct rb_node *rb_last(const struct rb_root *); - -/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root); - -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) -{ - node->rb_parent_color = (unsigned long )parent; - node->rb_left = node->rb_right = NULL; - - *rb_link = node; -} - -#endif /* _LINUX_RBTREE_H */ diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 2fb117fb4b6..2fdcfee8758 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -1,7 +1,7 @@ #ifndef STRLIST_H_ #define STRLIST_H_ -#include "rbtree.h" +#include #include struct str_node { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2c48ace8203..3a275db1fa6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -4,7 +4,7 @@ #include #include "types.h" #include "list.h" -#include "rbtree.h" +#include struct symbol { struct rb_node rb_node; -- cgit v1.2.3-70-g09d2 From 5da50258584469ddfee6545feb4eb2252a8d7e7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 1 Jul 2009 14:46:08 -0300 Subject: perf_counter tools: Share list.h with the kernel The copy we were using came from another copy I did for the dwarves (pahole) package, that came from the kernel years ago. The only function that is used by the perf tools and that isn't in the kernel is list_del_range, that I'm leaving in the perf tools only for now. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090701174608.GA5823@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 3 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/util/callchain.h | 2 +- tools/perf/util/include/asm/system.h | 1 + tools/perf/util/include/linux/list.h | 18 + tools/perf/util/include/linux/poison.h | 1 + tools/perf/util/include/linux/prefetch.h | 6 + tools/perf/util/list.h | 603 ------------------------------- tools/perf/util/symbol.h | 2 +- 10 files changed, 32 insertions(+), 608 deletions(-) create mode 100644 tools/perf/util/include/asm/system.h create mode 100644 tools/perf/util/include/linux/list.h create mode 100644 tools/perf/util/include/linux/poison.h create mode 100644 tools/perf/util/include/linux/prefetch.h delete mode 100644 tools/perf/util/list.h (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c4f48825a6e..8f7fd1b050a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -290,9 +290,10 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += ../../include/linux/rbtree.h +LIB_H += ../../include/linux/list.h +LIB_H += util/include/linux/list.h LIB_H += perf.h LIB_H += util/types.h -LIB_H += util/list.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h LIB_H += util/parse-events.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1960c22b816..132de8b28db 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -10,7 +10,7 @@ #include "util/util.h" #include "util/color.h" -#include "util/list.h" +#include #include "util/cache.h" #include #include "util/symbol.h" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0d35e2b8881..be1b7584796 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -10,7 +10,7 @@ #include "util/util.h" #include "util/color.h" -#include "util/list.h" +#include #include "util/cache.h" #include #include "util/symbol.h" diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 0606b8fd05a..e9bd5e882f3 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -2,7 +2,7 @@ #define __PERF_CALLCHAIN_H #include "../perf.h" -#include "list.h" +#include #include #include "symbol.h" diff --git a/tools/perf/util/include/asm/system.h b/tools/perf/util/include/asm/system.h new file mode 100644 index 00000000000..710cecca972 --- /dev/null +++ b/tools/perf/util/include/asm/system.h @@ -0,0 +1 @@ +/* Empty */ diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h new file mode 100644 index 00000000000..dbe4b814382 --- /dev/null +++ b/tools/perf/util/include/linux/list.h @@ -0,0 +1,18 @@ +#include "../../../../include/linux/list.h" + +#ifndef PERF_LIST_H +#define PERF_LIST_H +/** + * list_del_range - deletes range of entries from list. + * @begin: first element in the range to delete from the list. + * @end: last element in the range to delete from the list. + * Note: list_empty on the range of entries does not return true after this, + * the entries is in an undefined state. + */ +static inline void list_del_range(struct list_head *begin, + struct list_head *end) +{ + begin->prev->next = end->next; + end->next->prev = begin->prev; +} +#endif diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h new file mode 100644 index 00000000000..fef6dbc9ce1 --- /dev/null +++ b/tools/perf/util/include/linux/poison.h @@ -0,0 +1 @@ +#include "../../../../include/linux/poison.h" diff --git a/tools/perf/util/include/linux/prefetch.h b/tools/perf/util/include/linux/prefetch.h new file mode 100644 index 00000000000..7841e485d8c --- /dev/null +++ b/tools/perf/util/include/linux/prefetch.h @@ -0,0 +1,6 @@ +#ifndef PERF_LINUX_PREFETCH_H +#define PERF_LINUX_PREFETCH_H + +static inline void prefetch(void *a __attribute__((unused))) { } + +#endif diff --git a/tools/perf/util/list.h b/tools/perf/util/list.h deleted file mode 100644 index e2548e8072c..00000000000 --- a/tools/perf/util/list.h +++ /dev/null @@ -1,603 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H -/* - Copyright (C) Cast of dozens, comes from the Linux kernel - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. -*/ - -#include - -/* - * These are non-NULL pointers that will result in page faults - * under normal circumstances, used to verify that nobody uses - * non-initialized list entries. - */ -#define LIST_POISON1 ((void *)0x00100100) -#define LIST_POISON2 ((void *)0x00200200) - -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -static inline void INIT_LIST_HEAD(struct list_head *list) -{ - list->next = list; - list->prev = list; -} - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} - -/** - * list_del_range - deletes range of entries from list. - * @beging: first element in the range to delete from the list. - * @beging: first element in the range to delete from the list. - * Note: list_empty on the range of entries does not return true after this, - * the entries is in an undefined state. - */ -static inline void list_del_range(struct list_head *begin, - struct list_head *end) -{ - begin->prev->next = end->next; - end->next->prev = begin->prev; -} - -/** - * list_replace - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * Note: if 'old' was empty, it will be overwritten. - */ -static inline void list_replace(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->next->prev = new; - new->prev = old->prev; - new->prev->next = new; -} - -static inline void list_replace_init(struct list_head *old, - struct list_head *new) -{ - list_replace(old, new); - INIT_LIST_HEAD(old); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_is_last - tests whether @list is the last entry in list @head - * @list: the entry to test - * @head: the head of the list - */ -static inline int list_is_last(const struct list_head *list, - const struct list_head *head) -{ - return list->next == head; -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - -/** - * list_empty_careful - tests whether a list is empty and not being modified - * @head: the list to test - * - * Description: - * tests whether a list is empty _and_ checks that no other CPU might be - * in the process of modifying either member (next or prev) - * - * NOTE: using list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - */ -static inline int list_empty_careful(const struct list_head *head) -{ - struct list_head *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) - -/** - * list_first_entry - get the first element from a list - * @ptr: the list head to take the element from. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - * - * Note, that list is expected to be not empty. - */ -#define list_first_entry(ptr, type, member) \ - list_entry((ptr)->next, type, member) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); \ - pos = pos->next) - -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * list_for_each_prev - iterate over a list backwards - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; pos != (head); \ - pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) - -/** - * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue - * @pos: the type * to use as a start point - * @head: the head of the list - * @member: the name of the list_struct within the struct. - * - * Prepares a pos entry for use as a start point in list_for_each_entry_continue. - */ -#define list_prepare_entry(pos, head, member) \ - ((pos) ? : list_entry(head, typeof(*pos), member)) - -/** - * list_for_each_entry_continue - continue iteration over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Continue to iterate over list of given type, continuing after - * the current position. - */ -#define list_for_each_entry_continue(pos, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_from - iterate over list of given type from the current point - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing from current position. - */ -#define list_for_each_entry_from(pos, head, member) \ - for (; &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_continue - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing after current point, - * safe against removal of list entry. - */ -#define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_from - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type from current point, safe against - * removal of list entry. - */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_reverse - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate backwards over list of given type, safe against removal - * of list entry. - */ -#define list_for_each_entry_safe_reverse(pos, n, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member), \ - n = list_entry(pos->member.prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.prev, typeof(*n), member)) - -/* - * Double linked lists with a single pointer list head. - * Mostly useful for hash tables where the two pointer list head is - * too wasteful. - * You lose the ability to access the tail in O(1). - */ - -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -#define HLIST_HEAD_INIT { .first = NULL } -#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } -#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) -static inline void INIT_HLIST_NODE(struct hlist_node *h) -{ - h->next = NULL; - h->pprev = NULL; -} - -static inline int hlist_unhashed(const struct hlist_node *h) -{ - return !h->pprev; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); - n->next = LIST_POISON1; - n->pprev = LIST_POISON2; -} - -static inline void hlist_del_init(struct hlist_node *n) -{ - if (!hlist_unhashed(n)) { - __hlist_del(n); - INIT_HLIST_NODE(n); - } -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -/* next must be != NULL */ -static inline void hlist_add_before(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; - *(n->pprev) = n; -} - -static inline void hlist_add_after(struct hlist_node *n, - struct hlist_node *next) -{ - next->next = n->next; - n->next = next; - next->pprev = &n->next; - - if(next->next) - next->next->pprev = &next->next; -} - -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos; \ - pos = pos->next) - -#define hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ - pos = n) - -/** - * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @n: another &struct hlist_node to use as temporary storage - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -#endif diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3a275db1fa6..65a8449b91f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -3,7 +3,7 @@ #include #include "types.h" -#include "list.h" +#include #include struct symbol { -- cgit v1.2.3-70-g09d2 From a92bef0f216bbf3d05c0c0709ea02e267f2b920e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 21:02:10 +0200 Subject: perf stat: Handle pipe read failures in perf stat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building builtin-stat.c reports the following errors: cc1: warnings being treated as errors builtin-stat.c: In function ‘run_perf_stat’: builtin-stat.c:242: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result builtin-stat.c:255: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result make: *** [builtin-stat.o] Erreur 1 This patch handles the possible pipe read failures. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246474930-6088-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 01cc07efb72..27921a8ce1a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -239,7 +239,8 @@ static int run_perf_stat(int argc __used, const char **argv) /* * Wait until the parent tells us to go. */ - read(go_pipe[0], &buf, 1); + if (read(go_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); execvp(argv[0], (char **)argv); @@ -252,7 +253,8 @@ static int run_perf_stat(int argc __used, const char **argv) */ close(child_ready_pipe[1]); close(go_pipe[0]); - read(child_ready_pipe[0], &buf, 1); + if (read(child_ready_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); close(child_ready_pipe[0]); for (counter = 0; counter < nr_counters; counter++) -- cgit v1.2.3-70-g09d2 From 9974f496782b7612e36a143bedda858f1cb953d4 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:05:58 +0200 Subject: perf_counter tools: Make symbol loading consistently return number of loaded symbols perf_counter tools: Make symbol loading consistently return number of loaded symbols. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514758.13293.42.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/symbol.c | 9 ++++++--- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 132de8b28db..3becc8a35be 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -172,7 +172,7 @@ static int load_kernel(void) return -1; err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); - if (err) { + if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; } else diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index be1b7584796..58d1612894f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -189,7 +189,7 @@ static int load_kernel(void) return -1; err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); - if (err) { + if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; } else diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cdc74cfb151..97fde1d8436 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -364,7 +364,7 @@ static int parse_symbols(void) if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0) + if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 78c2efde01b..c077b6a1469 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -146,6 +146,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb char *line = NULL; size_t n; FILE *file = fopen("/proc/kallsyms", "r"); + int count = 0; if (file == NULL) goto out_failure; @@ -188,8 +189,10 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb if (filter && filter(self, sym)) symbol__delete(sym, self->sym_priv_size); - else + else { dso__insert_symbol(self, sym); + count++; + } } /* @@ -212,7 +215,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb free(line); fclose(file); - return 0; + return count; out_delete_line: free(line); @@ -639,7 +642,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); - if (err < 0) + if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); return err; -- cgit v1.2.3-70-g09d2 From 208b4b4a59351011b7f212e273f2b7bc47a9c482 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:07:10 +0200 Subject: perf_counter tools: Add infrastructure to support loading of kernel module symbols Add infrastructure for module path discovery and section load addresses. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514830.13293.44.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 + tools/perf/util/module.c | 509 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/module.h | 53 +++++ 3 files changed, 564 insertions(+) create mode 100644 tools/perf/util/module.c create mode 100644 tools/perf/util/module.h (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8f7fd1b050a..7822b3d6bac 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -306,6 +306,7 @@ LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h +LIB_H += util/module.h LIB_H += util/color.h LIB_OBJS += util/abspath.o @@ -329,6 +330,7 @@ LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o +LIB_OBJS += util/module.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c new file mode 100644 index 00000000000..ddabe925d65 --- /dev/null +++ b/tools/perf/util/module.c @@ -0,0 +1,509 @@ +#include "util.h" +#include "../perf.h" +#include "string.h" +#include "module.h" + +#include +#include +#include +#include +#include + +static unsigned int crc32(const char *p, unsigned int len) +{ + int i; + unsigned int crc = 0; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); + } + return crc; +} + +/* module section methods */ + +struct sec_dso *sec_dso__new_dso(const char *name) +{ + struct sec_dso *self = malloc(sizeof(*self) + strlen(name) + 1); + + if (self != NULL) { + strcpy(self->name, name); + self->secs = RB_ROOT; + self->find_section = sec_dso__find_section; + } + + return self; +} + +static void sec_dso__delete_section(struct section *self) +{ + free(((void *)self)); +} + +void sec_dso__delete_sections(struct sec_dso *self) +{ + struct section *pos; + struct rb_node *next = rb_first(&self->secs); + + while (next) { + pos = rb_entry(next, struct section, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, &self->secs); + sec_dso__delete_section(pos); + } +} + +void sec_dso__delete_self(struct sec_dso *self) +{ + sec_dso__delete_sections(self); + free(self); +} + +static void sec_dso__insert_section(struct sec_dso *self, struct section *sec) +{ + struct rb_node **p = &self->secs.rb_node; + struct rb_node *parent = NULL; + const u64 hash = sec->hash; + struct section *s; + + while (*p != NULL) { + parent = *p; + s = rb_entry(parent, struct section, rb_node); + if (hash < s->hash) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&sec->rb_node, parent, p); + rb_insert_color(&sec->rb_node, &self->secs); +} + +struct section *sec_dso__find_section(struct sec_dso *self, const char *name) +{ + struct rb_node *n; + u64 hash; + int len; + + if (self == NULL) + return NULL; + + len = strlen(name); + hash = crc32(name, len); + + n = self->secs.rb_node; + + while (n) { + struct section *s = rb_entry(n, struct section, rb_node); + + if (hash < s->hash) + n = n->rb_left; + else if (hash > s->hash) + n = n->rb_right; + else { + if (!strcmp(name, s->name)) + return s; + else + n = rb_next(&s->rb_node); + } + } + + return NULL; +} + +static size_t sec_dso__fprintf_section(struct section *self, FILE *fp) +{ + return fprintf(fp, "name:%s vma:%llx path:%s\n", + self->name, self->vma, self->path); +} + +size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp) +{ + size_t ret = fprintf(fp, "dso: %s\n", self->name); + + struct rb_node *nd; + for (nd = rb_first(&self->secs); nd; nd = rb_next(nd)) { + struct section *pos = rb_entry(nd, struct section, rb_node); + ret += sec_dso__fprintf_section(pos, fp); + } + + return ret; +} + +static struct section *section__new(const char *name, const char *path) +{ + struct section *self = calloc(1, sizeof(*self)); + + if (!self) + goto out_failure; + + self->name = calloc(1, strlen(name) + 1); + if (!self->name) + goto out_failure; + + self->path = calloc(1, strlen(path) + 1); + if (!self->path) + goto out_failure; + + strcpy(self->name, name); + strcpy(self->path, path); + self->hash = crc32(self->name, strlen(name)); + + return self; + +out_failure: + if (self) { + if (self->name) + free(self->name); + if (self->path) + free(self->path); + free(self); + } + + return NULL; +} + +/* module methods */ + +struct mod_dso *mod_dso__new_dso(const char *name) +{ + struct mod_dso *self = malloc(sizeof(*self) + strlen(name) + 1); + + if (self != NULL) { + strcpy(self->name, name); + self->mods = RB_ROOT; + self->find_module = mod_dso__find_module; + } + + return self; +} + +static void mod_dso__delete_module(struct module *self) +{ + free(((void *)self)); +} + +void mod_dso__delete_modules(struct mod_dso *self) +{ + struct module *pos; + struct rb_node *next = rb_first(&self->mods); + + while (next) { + pos = rb_entry(next, struct module, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, &self->mods); + mod_dso__delete_module(pos); + } +} + +void mod_dso__delete_self(struct mod_dso *self) +{ + mod_dso__delete_modules(self); + free(self); +} + +static void mod_dso__insert_module(struct mod_dso *self, struct module *mod) +{ + struct rb_node **p = &self->mods.rb_node; + struct rb_node *parent = NULL; + const u64 hash = mod->hash; + struct module *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct module, rb_node); + if (hash < m->hash) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&mod->rb_node, parent, p); + rb_insert_color(&mod->rb_node, &self->mods); +} + +struct module *mod_dso__find_module(struct mod_dso *self, const char *name) +{ + struct rb_node *n; + u64 hash; + int len; + + if (self == NULL) + return NULL; + + len = strlen(name); + hash = crc32(name, len); + + n = self->mods.rb_node; + + while (n) { + struct module *m = rb_entry(n, struct module, rb_node); + + if (hash < m->hash) + n = n->rb_left; + else if (hash > m->hash) + n = n->rb_right; + else { + if (!strcmp(name, m->name)) + return m; + else + n = rb_next(&m->rb_node); + } + } + + return NULL; +} + +static size_t mod_dso__fprintf_module(struct module *self, FILE *fp) +{ + return fprintf(fp, "name:%s path:%s\n", self->name, self->path); +} + +size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp) +{ + struct rb_node *nd; + size_t ret; + + ret = fprintf(fp, "dso: %s\n", self->name); + + for (nd = rb_first(&self->mods); nd; nd = rb_next(nd)) { + struct module *pos = rb_entry(nd, struct module, rb_node); + + ret += mod_dso__fprintf_module(pos, fp); + } + + return ret; +} + +static struct module *module__new(const char *name, const char *path) +{ + struct module *self = calloc(1, sizeof(*self)); + + if (!self) + goto out_failure; + + self->name = calloc(1, strlen(name) + 1); + if (!self->name) + goto out_failure; + + self->path = calloc(1, strlen(path) + 1); + if (!self->path) + goto out_failure; + + strcpy(self->name, name); + strcpy(self->path, path); + self->hash = crc32(self->name, strlen(name)); + + return self; + +out_failure: + if (self) { + if (self->name) + free(self->name); + if (self->path) + free(self->path); + free(self); + } + + return NULL; +} + +static int mod_dso__load_sections(struct module *mod) +{ + int count = 0, path_len; + struct dirent *entry; + char *line = NULL; + char *dir_path; + DIR *dir; + size_t n; + + path_len = strlen("/sys/module/"); + path_len += strlen(mod->name); + path_len += strlen("/sections/"); + + dir_path = calloc(1, path_len + 1); + if (dir_path == NULL) + goto out_failure; + + strcat(dir_path, "/sys/module/"); + strcat(dir_path, mod->name); + strcat(dir_path, "/sections/"); + + dir = opendir(dir_path); + if (dir == NULL) + goto out_free; + + while ((entry = readdir(dir))) { + struct section *section; + char *path, *vma; + int line_len; + FILE *file; + + if (!strcmp(".", entry->d_name) || !strcmp("..", entry->d_name)) + continue; + + path = calloc(1, path_len + strlen(entry->d_name) + 1); + if (path == NULL) + break; + strcat(path, dir_path); + strcat(path, entry->d_name); + + file = fopen(path, "r"); + if (file == NULL) { + free(path); + break; + } + + line_len = getline(&line, &n, file); + if (line_len < 0) { + free(path); + fclose(file); + break; + } + + if (!line) { + free(path); + fclose(file); + break; + } + + line[--line_len] = '\0'; /* \n */ + + vma = strstr(line, "0x"); + if (!vma) { + free(path); + fclose(file); + break; + } + vma += 2; + + section = section__new(entry->d_name, path); + if (!section) { + fprintf(stderr, "load_sections: allocation error\n"); + free(path); + fclose(file); + break; + } + + hex2u64(vma, §ion->vma); + sec_dso__insert_section(mod->sections, section); + + free(path); + fclose(file); + count++; + } + + closedir(dir); + free(line); + free(dir_path); + + return count; + +out_free: + free(dir_path); + +out_failure: + return count; +} + +static int mod_dso__load_module_paths(struct mod_dso *self) +{ + struct utsname uts; + int count = 0, len; + char *line = NULL; + FILE *file; + char *path; + size_t n; + + if (uname(&uts) < 0) + goto out_failure; + + len = strlen("/lib/modules/"); + len += strlen(uts.release); + len += strlen("/modules.dep"); + + path = calloc(1, len); + if (path == NULL) + goto out_failure; + + strcat(path, "/lib/modules/"); + strcat(path, uts.release); + strcat(path, "/modules.dep"); + + file = fopen(path, "r"); + free(path); + if (file == NULL) + goto out_failure; + + while (!feof(file)) { + char *path, *name, *tmp; + struct module *module; + int line_len, len; + + line_len = getline(&line, &n, file); + if (line_len < 0) + break; + + if (!line) + goto out_failure; + + line[--line_len] = '\0'; /* \n */ + + path = strtok(line, ":"); + if (!path) + goto out_failure; + + name = strdup(path); + name = strtok(name, "/"); + + tmp = name; + + while (tmp) { + tmp = strtok(NULL, "/"); + if (tmp) + name = tmp; + } + name = strsep(&name, "."); + + /* Quirk: replace '-' with '_' in sound modules */ + for (len = strlen(name); len; len--) { + if (*(name+len) == '-') + *(name+len) = '_'; + } + + module = module__new(name, path); + if (!module) { + fprintf(stderr, "load_module_paths: allocation error\n"); + goto out_failure; + } + mod_dso__insert_module(self, module); + + module->sections = sec_dso__new_dso("sections"); + if (!module->sections) { + fprintf(stderr, "load_module_paths: allocation error\n"); + goto out_failure; + } + + module->active = mod_dso__load_sections(module); + + if (module->active > 0) + count++; + } + + free(line); + fclose(file); + + return count; + +out_failure: + return -1; +} + +int mod_dso__load_modules(struct mod_dso *dso) +{ + int err; + + err = mod_dso__load_module_paths(dso); + + return err; +} diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h new file mode 100644 index 00000000000..8a592ef641c --- /dev/null +++ b/tools/perf/util/module.h @@ -0,0 +1,53 @@ +#ifndef _PERF_MODULE_ +#define _PERF_MODULE_ 1 + +#include +#include "../types.h" +#include +#include + +struct section { + struct rb_node rb_node; + u64 hash; + u64 vma; + char *name; + char *path; +}; + +struct sec_dso { + struct list_head node; + struct rb_root secs; + struct section *(*find_section)(struct sec_dso *, const char *name); + char name[0]; +}; + +struct module { + struct rb_node rb_node; + u64 hash; + char *name; + char *path; + struct sec_dso *sections; + int active; +}; + +struct mod_dso { + struct list_head node; + struct rb_root mods; + struct module *(*find_module)(struct mod_dso *, const char *name); + char name[0]; +}; + +struct sec_dso *sec_dso__new_dso(const char *name); +void sec_dso__delete_sections(struct sec_dso *self); +void sec_dso__delete_self(struct sec_dso *self); +size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp); +struct section *sec_dso__find_section(struct sec_dso *self, const char *name); + +struct mod_dso *mod_dso__new_dso(const char *name); +void mod_dso__delete_modules(struct mod_dso *self); +void mod_dso__delete_self(struct mod_dso *self); +size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp); +struct module *mod_dso__find_module(struct mod_dso *self, const char *name); +int mod_dso__load_modules(struct mod_dso *dso); + +#endif /* _PERF_MODULE_ */ -- cgit v1.2.3-70-g09d2 From 6cfcc53ed4f3ecb9319e73a03f34f1eddcb644dd Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:08:36 +0200 Subject: perf_counter tools: Connect module support infrastructure to symbol loading infrastructure Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514916.13293.46.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/symbol.c | 159 +++++++++++++++++++++++++++++++++++++++--- tools/perf/util/symbol.h | 5 +- 5 files changed, 156 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3becc8a35be..88205686eb6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -171,7 +171,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 58d1612894f..38d136fedfb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -188,7 +188,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 97fde1d8436..9bb25fc3d4c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -364,7 +364,7 @@ static int parse_symbols(void) if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) <= 0) + if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1, 0) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c077b6a1469..98a13114de7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -35,7 +35,7 @@ static struct symbol *symbol__new(u64 start, u64 len, self = ((void *)self) + priv_size; } self->start = start; - self->end = start + len - 1; + self->end = len ? start + len - 1 : start; memcpy(self->name, name, namelen); return self; @@ -48,8 +48,12 @@ static void symbol__delete(struct symbol *self, unsigned int priv_size) static size_t symbol__fprintf(struct symbol *self, FILE *fp) { - return fprintf(fp, " %llx-%llx %s\n", + if (!self->module) + return fprintf(fp, " %llx-%llx %s\n", self->start, self->end, self->name); + else + return fprintf(fp, " %llx-%llx %s \t[%s]\n", + self->start, self->end, self->name, self->module->name); } struct dso *dso__new(const char *name, unsigned int sym_priv_size) @@ -310,6 +314,26 @@ static inline int elf_sym__is_function(const GElf_Sym *sym) sym->st_size != 0; } +static inline int elf_sym__is_label(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_NOTYPE && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF && + sym->st_shndx != SHN_ABS; +} + +static inline const char *elf_sec__name(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return secstrs->d_buf + shdr->sh_name; +} + +static inline int elf_sec__is_text(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; +} + static inline const char *elf_sym__name(const GElf_Sym *sym, const Elf_Data *symstrs) { @@ -451,9 +475,9 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, } static int dso__load_sym(struct dso *self, int fd, const char *name, - symbol_filter_t filter, int verbose) + symbol_filter_t filter, int verbose, struct module *mod) { - Elf_Data *symstrs; + Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; uint32_t index; @@ -461,7 +485,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, GElf_Shdr shdr; Elf_Data *syms; GElf_Sym sym; - Elf_Scn *sec, *sec_dynsym; + Elf_Scn *sec, *sec_dynsym, *sec_strndx; Elf *elf; size_t dynsym_idx; int nr = 0; @@ -520,6 +544,14 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, if (symstrs == NULL) goto out_elf_end; + sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); + if (sec_strndx == NULL) + goto out_elf_end; + + secstrs = elf_getdata(sec_strndx, NULL); + if (symstrs == NULL) + goto out_elf_end; + nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); @@ -529,8 +561,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; + struct section *section = NULL; + int is_label = elf_sym__is_label(&sym); + const char *section_name; - if (!elf_sym__is_function(&sym)) + if (!is_label && !elf_sym__is_function(&sym)) continue; sec = elf_getscn(elf, sym.st_shndx); @@ -538,6 +573,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, goto out_elf_end; gelf_getshdr(sec, &shdr); + + if (is_label && !elf_sec__is_text(&shdr, secstrs)) + continue; + + section_name = elf_sec__name(&shdr, secstrs); obj_start = sym.st_value; if (self->prelinked) { @@ -548,6 +588,17 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, sym.st_value -= shdr.sh_addr - shdr.sh_offset; } + if (mod) { + section = mod->sections->find_section(mod->sections, section_name); + if (section) + sym.st_value += section->vma; + else { + fprintf(stderr, "dso__load_sym() module %s lookup of %s failed\n", + mod->name, section_name); + goto out_elf_end; + } + } + f = symbol__new(sym.st_value, sym.st_size, elf_sym__name(&sym, symstrs), self->sym_priv_size, obj_start, verbose); @@ -557,6 +608,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, if (filter && filter(self, f)) symbol__delete(f, self->sym_priv_size); else { + f->module = mod; dso__insert_symbol(self, f); nr++; } @@ -606,7 +658,7 @@ more: fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, fd, name, filter, verbose); + ret = dso__load_sym(self, fd, name, filter, verbose, NULL); close(fd); /* @@ -620,6 +672,86 @@ out: return ret; } +static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name, + symbol_filter_t filter, int verbose) +{ + struct module *mod = mod_dso__find_module(mods, name); + int err = 0, fd; + + if (mod == NULL || !mod->active) + return err; + + fd = open(mod->path, O_RDONLY); + + if (fd < 0) + return err; + + err = dso__load_sym(self, fd, name, filter, verbose, mod); + close(fd); + + return err; +} + +int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose) +{ + struct mod_dso *mods = mod_dso__new_dso("modules"); + struct module *pos; + struct rb_node *next; + int err; + + err = mod_dso__load_modules(mods); + + if (err <= 0) + return err; + + /* + * Iterate over modules, and load active symbols. + */ + next = rb_first(&mods->mods); + while (next) { + pos = rb_entry(next, struct module, rb_node); + err = dso__load_module(self, mods, pos->name, filter, verbose); + + if (err < 0) + break; + + next = rb_next(&pos->rb_node); + } + + if (err < 0) { + mod_dso__delete_modules(mods); + mod_dso__delete_self(mods); + } + + return err; +} + +static inline void dso__fill_symbol_holes(struct dso *self) +{ + struct symbol *prev = NULL; + struct rb_node *nd; + + for (nd = rb_last(&self->syms); nd; nd = rb_prev(nd)) { + struct symbol *pos = rb_entry(nd, struct symbol, rb_node); + + if (prev) { + u64 hole = 0; + int alias = pos->start == prev->start; + + if (!alias) + hole = prev->start - pos->end - 1; + + if (hole || alias) { + if (alias) + pos->end = prev->end; + else if (hole) + pos->end = prev->start - 1; + } + } + prev = pos; + } +} + static int dso__load_vmlinux(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose) { @@ -628,19 +760,26 @@ static int dso__load_vmlinux(struct dso *self, const char *vmlinux, if (fd < 0) return -1; - err = dso__load_sym(self, fd, vmlinux, filter, verbose); + err = dso__load_sym(self, fd, vmlinux, filter, verbose, NULL); + + if (err > 0) + dso__fill_symbol_holes(self); + close(fd); return err; } int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose) + symbol_filter_t filter, int verbose, int modules) { int err = -1; - if (vmlinux) + if (vmlinux) { err = dso__load_vmlinux(self, vmlinux, filter, verbose); + if (err > 0 && modules) + err = dso__load_modules(self, filter, verbose); + } if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 65a8449b91f..4e141a30911 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,6 +5,7 @@ #include "types.h" #include #include +#include "module.h" struct symbol { struct rb_node rb_node; @@ -13,6 +14,7 @@ struct symbol { u64 obj_start; u64 hist_sum; u64 *hist; + struct module *module; void *priv; char name[0]; }; @@ -41,7 +43,8 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose); + symbol_filter_t filter, int verbose, int modules); +int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); int dso__load(struct dso *self, symbol_filter_t filter, int verbose); size_t dso__fprintf(struct dso *self, FILE *fp); -- cgit v1.2.3-70-g09d2 From 429764873cf3fc3e73142872a674bb27cda589c1 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:09:46 +0200 Subject: perf_counter tools: Enable kernel module symbol loading in tools Add the -m/--modules option to perf report and perf annotate, which enables live module symbol/image loading. To be used with -k/--vmlinux. (Also give perf annotate a -P/--full-paths option.) Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514986.13293.48.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 25 ++++++++++++++++++++----- tools/perf/builtin-report.c | 9 ++++++++- tools/perf/builtin-top.c | 12 ++++++++++-- 3 files changed, 38 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 88205686eb6..08ea6c5329d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -43,6 +43,10 @@ static int dump_trace = 0; static int verbose; +static int modules; + +static int full_paths; + static int print_line; static unsigned long page_size; @@ -171,7 +175,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; @@ -1268,19 +1272,25 @@ static void print_summary(char *filename) static void annotate_sym(struct dso *dso, struct symbol *sym) { - char *filename = dso->name; + char *filename = dso->name, *d_filename; u64 start, end, len; char command[PATH_MAX*2]; FILE *file; if (!filename) return; - if (dso == kernel_dso) + if (sym->module) + filename = sym->module->path; + else if (dso == kernel_dso) filename = vmlinux; start = sym->obj_start; if (!start) start = sym->start; + if (full_paths) + d_filename = filename; + else + d_filename = basename(filename); end = start + sym->end - sym->start + 1; len = sym->end - sym->start; @@ -1291,13 +1301,14 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) } printf("\n\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", filename); + printf(" Percent | Source code & Disassembly of %s\n", d_filename); printf("------------------------------------------------\n"); if (verbose >= 2) printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (u64)start, (u64)end, filename); + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", + (u64)start, (u64)end, filename, filename); if (verbose >= 3) printf("doing: %s\n", command); @@ -1472,8 +1483,12 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &modules, + "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), + OPT_BOOLEAN('P', "full-paths", &full_paths, + "Don't shorten the displayed pathnames"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 38d136fedfb..b44476ca239 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -46,6 +46,8 @@ static int dump_trace = 0; static int verbose; #define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) +static int modules; + static int full_paths; static unsigned long page_size; @@ -188,7 +190,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; @@ -648,6 +650,9 @@ sort__sym_print(FILE *fp, struct hist_entry *self) ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); + + if (self->sym->module) + ret += fprintf(fp, "\t[%s]", self->sym->module->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); } @@ -1710,6 +1715,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &modules, + "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 9bb25fc3d4c..aa044ea1482 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -66,6 +66,7 @@ static unsigned int page_size; static unsigned int mmap_pages = 16; static int freq = 0; static int verbose = 0; +static char *vmlinux = NULL; static char *sym_filter; static unsigned long filter_start; @@ -265,7 +266,10 @@ static void print_sym_table(void) printf("%9.1f %10ld - ", syme->weight, syme->snap_count); color_fprintf(stdout, color, "%4.1f%%", pcnt); - printf(" - %016llx : %s\n", sym->start, sym->name); + printf(" - %016llx : %s", sym->start, sym->name); + if (sym->module) + printf("\t[%s]", sym->module->name); + printf("\n"); } } @@ -359,12 +363,13 @@ static int parse_symbols(void) { struct rb_node *node; struct symbol *sym; + int modules = vmlinux ? 1 : 0; kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1, 0) <= 0) + if (dso__load_kernel(kernel_dso, vmlinux, symbol_filter, verbose, modules) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); @@ -680,6 +685,7 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), + OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, @@ -709,6 +715,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; + symbol__init(); + page_size = sysconf(_SC_PAGE_SIZE); argc = parse_options(argc, argv, options, top_usage, 0); -- cgit v1.2.3-70-g09d2 From 14f4654cbd531d48651e005cf05907c14bddb193 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 17:58:19 +0200 Subject: perf_counter tools: Create new chain_for_each_child() iterator Iterating through children of a node in the callchain tree shows something that may be quite confusing at a first glance. The head is the children field of the parent and the list nodes are in the brothers field of the children. This is because the childs are linked to the parent as a list of "brothers" using the "children" list of the parent as a head: --------------- | Parent (head) |------------------------------------- --------------- | | | children | | | ----------- ----------- | | 1st child |---brother---| 2nd child |---brother----- ----------- ----------- This makes the following strange pattern often occuring: list_for_each_entry(child, &parent->children, brothers) { // do something with children } Abstract it to chain_for_each_child() to factorize and simplify this pattern. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246550301-8954-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3dceabd9b5e..3c4a91fea62 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -16,6 +16,9 @@ #include "callchain.h" +#define chain_for_each_child(child, parent) \ + list_for_each_entry(child, &parent->children, brothers) + static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) @@ -46,7 +49,7 @@ void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) { struct callchain_node *child; - list_for_each_entry(child, &node->children, brothers) + chain_for_each_child(child, node) sort_chain_to_rbtree(rb_root, child); if (node->hit) @@ -77,7 +80,7 @@ create_child(struct callchain_node *parent, bool inherit_children) list_splice(&parent->children, &new->children); INIT_LIST_HEAD(&parent->children); - list_for_each_entry(next, &new->children, brothers) + chain_for_each_child(next, new) next->parent = new; } list_add_tail(&new->brothers, &parent->children); @@ -173,7 +176,7 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, struct callchain_node *rnode; /* lookup in childrens */ - list_for_each_entry(rnode, &root->children, brothers) { + chain_for_each_child(rnode, root) { unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) -- cgit v1.2.3-70-g09d2 From 5a4b181721375700124513cdd9f97056e1c66675 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 17:58:20 +0200 Subject: perf_counter tools: Add new OPT_CALLBACK_DEFAULT option There is no predefined macro to create an option that can have a custom value or a default one if none is given. This patch provides a new helper OPT_CALLBACK_DEFAULT() which defines such kind of option. For example, considering an option -c, we want to get the default value in the following cases: perf command -c -d perf command -d -c And the foo value when it's given: perf command -c foo -d perf command -d -c foo That's also why PARSE_OPT_LASTARG_DEFAULT is extended here to support default values whatever the position of the option, not only in the end. Should it now be renamed to PARSE_OPT_ARG_DEFAULT ? Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo Cc: git@vger.kernel.org LKML-Reference: <1246550301-8954-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-options.c | 3 ++- tools/perf/util/parse-options.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 9a897b7cce7..1bf67190c82 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -20,7 +20,8 @@ static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, if (p->opt) { *arg = p->opt; p->opt = NULL; - } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { + } else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 || + **(p->argv + 1) == '-')) { *arg = (const char *)opt->defval; } else if (p->argc > 1) { p->argc--; diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 15c8aba9c62..8aa3464c709 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -104,6 +104,8 @@ struct option { { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } +#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } /* parse_options() will filter out the processed options and leave the * non-option argments in argv[]. -- cgit v1.2.3-70-g09d2 From 4eb3e4788b8a5e220a0aeb590f88c28850726ebe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 17:58:21 +0200 Subject: perf report: Add support for callchain graph output Currently, the printing of callchains is done in a single vertical level, this is the "flat" mode: 8.25% [k] copy_user_generic_string 4.19% copy_user_generic_string generic_file_aio_read do_sync_read vfs_read sys_pread64 system_call_fastpath pread64 This patch introduces a new "graph" mode which provides a hierarchical output of factorized paths recursively sorted: 8.25% [k] copy_user_generic_string | |--4.31%-- generic_file_aio_read | do_sync_read | vfs_read | | | |--4.19%-- sys_pread64 | | system_call_fastpath | | pread64 | | | --0.12%-- sys_read | system_call_fastpath | __read | |--3.24%-- generic_file_buffered_write | __generic_file_aio_write_nolock | generic_file_aio_write | do_sync_write | reiserfs_file_write | vfs_write | | | |--3.14%-- sys_pwrite64 | | system_call_fastpath | | __pwrite64 | | | --0.10%-- sys_write [...] The command line has then changed. By providing the -c option, the callchain will output in the flat mode by default. But you can override it: perf report -c graph or perf report -c flat You can also pass the abreviated mode: perf report -c g or perf report -c gra will both make use of the graph mode. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 141 +++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/callchain.c | 51 +++++++++++++--- tools/perf/util/callchain.h | 11 +++- 3 files changed, 185 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b44476ca239..0ca46386d93 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,6 +59,7 @@ static regex_t parent_regex; static int exclude_other = 1; static int callchain; +static enum chain_mode callchain_mode; static u64 sample_type; @@ -787,8 +788,103 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) +{ + int i; + size_t ret = 0; + + ret += fprintf(fp, "%s", " "); + + for (i = 0; i < depth; i++) + if (depth_mask & (1 << i)) + ret += fprintf(fp, "| "); + else + ret += fprintf(fp, " "); + + ret += fprintf(fp, "\n"); + + return ret; +} static size_t -callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) +ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, + int depth_mask, int count, u64 total_samples, + int hits) +{ + int i; + size_t ret = 0; + + ret += fprintf(fp, "%s", " "); + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + if (!count && i == depth - 1) { + double percent; + + percent = hits * 100.0 / total_samples; + ret += fprintf(fp, "--%2.2f%%-- ", percent); + } else + ret += fprintf(fp, "%s", " "); + } + if (chain->sym) + ret += fprintf(fp, "%s\n", chain->sym->name); + else + ret += fprintf(fp, "%p\n", (void *)(long)chain->ip); + + return ret; +} + +static size_t +callchain__fprintf_graph(FILE *fp, struct callchain_node *self, + u64 total_samples, int depth, int depth_mask) +{ + struct rb_node *node, *next; + struct callchain_node *child; + struct callchain_list *chain; + int new_depth_mask = depth_mask; + size_t ret = 0; + int i; + + node = rb_first(&self->rb_root); + while (node) { + child = rb_entry(node, struct callchain_node, rb_node); + + /* + * The depth mask manages the output of pipes that show + * the depth. We don't want to keep the pipes of the current + * level for the last child of this depth + */ + next = rb_next(node); + if (!next) + new_depth_mask &= ~(1 << (depth - 1)); + + /* + * But we keep the older depth mask for the line seperator + * to keep the level link until we reach the last child + */ + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask); + i = 0; + list_for_each_entry(chain, &child->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + ret += ipchain__fprintf_graph(fp, chain, depth, + new_depth_mask, i++, + total_samples, + child->cumul_hit); + } + ret += callchain__fprintf_graph(fp, child, total_samples, + depth + 1, + new_depth_mask | (1 << depth)); + node = next; + } + + return ret; +} + +static size_t +callchain__fprintf_flat(FILE *fp, struct callchain_node *self, + u64 total_samples) { struct callchain_list *chain; size_t ret = 0; @@ -796,7 +892,7 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) if (!self) return 0; - ret += callchain__fprintf(fp, self->parent, total_samples); + ret += callchain__fprintf_flat(fp, self->parent, total_samples); list_for_each_entry(chain, &self->val, list) { @@ -826,8 +922,13 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; - ret += fprintf(fp, " %6.2f%%\n", percent); - ret += callchain__fprintf(fp, chain, total_samples); + if (callchain_mode == FLAT) { + ret += fprintf(fp, " %6.2f%%\n", percent); + ret += callchain__fprintf_flat(fp, chain, total_samples); + } else if (callchain_mode == GRAPH) { + ret += callchain__fprintf_graph(fp, chain, + total_samples, 1, 1); + } ret += fprintf(fp, "\n"); rb_node = rb_next(rb_node); } @@ -1129,8 +1230,12 @@ static void output__insert_entry(struct hist_entry *he) struct rb_node *parent = NULL; struct hist_entry *iter; - if (callchain) - sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); + if (callchain) { + if (callchain_mode == FLAT) + sort_chain_flat(&he->sorted_chain, &he->callchain); + else if (callchain_mode == GRAPH) + sort_chain_graph(&he->sorted_chain, &he->callchain); + } while (*p != NULL) { parent = *p; @@ -1702,6 +1807,26 @@ done: return rc; } +static int +parse_callchain_opt(const struct option *opt __used, const char *arg, + int unset __used) +{ + callchain = 1; + + if (!arg) + return 0; + + if (!strncmp(arg, "graph", strlen(arg))) + callchain_mode = GRAPH; + + else if (!strncmp(arg, "flat", strlen(arg))) + callchain_mode = FLAT; + else + return -1; + + return 0; +} + static const char * const report_usage[] = { "perf report [] ", NULL @@ -1725,7 +1850,9 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), - OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), + OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type", + "Display callchains with output_type: flat, graph. " + "Default to flat", &parse_callchain_opt, "flat"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3c4a91fea62..a9873aafcd9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -19,9 +19,9 @@ #define chain_for_each_child(child, parent) \ list_for_each_entry(child, &parent->children, brothers) - static void -rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, + enum chain_mode mode) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -31,10 +31,22 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); - if (rnode->hit < chain->hit) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; + switch (mode) { + case FLAT: + if (rnode->hit < chain->hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + break; + case GRAPH: + if (rnode->cumul_hit < chain->cumul_hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + break; + default: + break; + } } rb_link_node(&chain->rb_node, parent, p); @@ -45,15 +57,36 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) * Once we get every callchains from the stream, we can now * sort them by hit */ -void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node) { struct callchain_node *child; chain_for_each_child(child, node) - sort_chain_to_rbtree(rb_root, child); + sort_chain_flat(rb_root, child); if (node->hit) - rb_insert_callchain(rb_root, node); + rb_insert_callchain(rb_root, node, FLAT); +} + +static void __sort_chain_graph(struct callchain_node *node) +{ + struct callchain_node *child; + + node->rb_root = RB_ROOT; + node->cumul_hit = node->hit; + + chain_for_each_child(child, node) { + __sort_chain_graph(child); + rb_insert_callchain(&node->rb_root, child, GRAPH); + node->cumul_hit += child->cumul_hit; + } +} + +void +sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root) +{ + __sort_chain_graph(chain_root); + rb_root->rb_node = chain_root->rb_root.rb_node; } /* diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index e9bd5e882f3..dfa56008d9a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -6,15 +6,21 @@ #include #include "symbol.h" +enum chain_mode { + FLAT, + GRAPH +}; struct callchain_node { struct callchain_node *parent; struct list_head brothers; struct list_head children; struct list_head val; - struct rb_node rb_node; + struct rb_node rb_node; /* to sort nodes in an rbtree */ + struct rb_root rb_root; /* sorted tree of children */ unsigned int val_nr; u64 hit; + u64 cumul_hit; /* hit + hits of children */ }; struct callchain_list { @@ -32,5 +38,6 @@ static inline void callchain_init(struct callchain_node *node) void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node); +void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node); #endif -- cgit v1.2.3-70-g09d2 From c20ab37ef30f4a874cf27e84c12c73e584e6f5cc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 20:14:33 +0200 Subject: perf_counter tools: Set the minimum percent for callchains to be displayed Callchains output may become a burden on a trace because even rarely hit site are exposed. This can be too much information. Let the user set a threshold as a minimum percent of hits using the new pattern for the -c option: -c mode,min_percent Example: $ perf report -s sym -c flat,4 8.25% [k] copy_user_generic_string 4.19% copy_user_generic_string generic_file_aio_read do_sync_read vfs_read sys_pread64 system_call_fastpath pread64 5.39% [k] search_by_key 4.63% 0x00000000009e0a 2.36% [k] memcpy_c [...] $ perf report -s sym -c graph,2 8.25% [k] copy_user_generic_string | |--4.31%-- generic_file_aio_read | do_sync_read | vfs_read | | | --4.19%-- sys_pread64 | system_call_fastpath | pread64 | --3.24%-- generic_file_buffered_write __generic_file_aio_write_nolock generic_file_aio_write do_sync_write reiserfs_file_write vfs_write | --3.14%-- sys_pwrite64 system_call_fastpath __pwrite64 5.39% [k] search_by_key | --2.23%-- reiserfs_update_sd_size 4.63% 0x00000000009e0a 2.36% [k] memcpy_c [...] You can also omit it and it will default to 0. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246558475-10624-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 45 ++++++++++++++++++++++++++++++++++----------- tools/perf/util/callchain.c | 19 +++++++++++-------- tools/perf/util/callchain.h | 6 ++++-- 3 files changed, 49 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0ca46386d93..e8c98179fe4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -60,6 +60,7 @@ static regex_t parent_regex; static int exclude_other = 1; static int callchain; static enum chain_mode callchain_mode; +static double callchain_min_percent = 0.0; static u64 sample_type; @@ -1224,7 +1225,7 @@ static void collapse__resort(void) static struct rb_root output_hists; -static void output__insert_entry(struct hist_entry *he) +static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) { struct rb_node **p = &output_hists.rb_node; struct rb_node *parent = NULL; @@ -1232,9 +1233,11 @@ static void output__insert_entry(struct hist_entry *he) if (callchain) { if (callchain_mode == FLAT) - sort_chain_flat(&he->sorted_chain, &he->callchain); + sort_chain_flat(&he->sorted_chain, &he->callchain, + min_callchain_hits); else if (callchain_mode == GRAPH) - sort_chain_graph(&he->sorted_chain, &he->callchain); + sort_chain_graph(&he->sorted_chain, &he->callchain, + min_callchain_hits); } while (*p != NULL) { @@ -1251,11 +1254,14 @@ static void output__insert_entry(struct hist_entry *he) rb_insert_color(&he->rb_node, &output_hists); } -static void output__resort(void) +static void output__resort(u64 total_samples) { struct rb_node *next; struct hist_entry *n; struct rb_root *tree = &hist; + u64 min_callchain_hits; + + min_callchain_hits = total_samples * (callchain_min_percent / 100); if (sort__need_collapse) tree = &collapse_hists; @@ -1267,7 +1273,7 @@ static void output__resort(void) next = rb_next(&n->rb_node); rb_erase(&n->rb_node, tree); - output__insert_entry(n); + output__insert_entry(n, min_callchain_hits); } } @@ -1801,7 +1807,7 @@ done: dsos__fprintf(stdout); collapse__resort(); - output__resort(); + output__resort(total); output__fprintf(stdout, total); return rc; @@ -1811,19 +1817,36 @@ static int parse_callchain_opt(const struct option *opt __used, const char *arg, int unset __used) { + char *tok; + char *endptr; + callchain = 1; if (!arg) return 0; - if (!strncmp(arg, "graph", strlen(arg))) + tok = strtok((char *)arg, ","); + if (!tok) + return -1; + + /* get the output mode */ + if (!strncmp(tok, "graph", strlen(arg))) callchain_mode = GRAPH; - else if (!strncmp(arg, "flat", strlen(arg))) + else if (!strncmp(tok, "flat", strlen(arg))) callchain_mode = FLAT; else return -1; + /* get the min percentage */ + tok = strtok(NULL, ","); + if (!tok) + return 0; + + callchain_min_percent = strtod(tok, &endptr); + if (tok == endptr) + return -1; + return 0; } @@ -1850,9 +1873,9 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type", - "Display callchains with output_type: flat, graph. " - "Default to flat", &parse_callchain_opt, "flat"), + OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type,min_percent", + "Display callchains using output_type and min percent threshold. " + "Default: flat,0", &parse_callchain_opt, "flat,100"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index a9873aafcd9..c9900fe6b8b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -57,18 +57,19 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, * Once we get every callchains from the stream, we can now * sort them by hit */ -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node) +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit) { struct callchain_node *child; chain_for_each_child(child, node) - sort_chain_flat(rb_root, child); + sort_chain_flat(rb_root, child, min_hit); - if (node->hit) + if (node->hit && node->hit >= min_hit) rb_insert_callchain(rb_root, node, FLAT); } -static void __sort_chain_graph(struct callchain_node *node) +static void __sort_chain_graph(struct callchain_node *node, u64 min_hit) { struct callchain_node *child; @@ -76,16 +77,18 @@ static void __sort_chain_graph(struct callchain_node *node) node->cumul_hit = node->hit; chain_for_each_child(child, node) { - __sort_chain_graph(child); - rb_insert_callchain(&node->rb_root, child, GRAPH); + __sort_chain_graph(child, min_hit); + if (child->cumul_hit >= min_hit) + rb_insert_callchain(&node->rb_root, child, GRAPH); node->cumul_hit += child->cumul_hit; } } void -sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root) +sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root, + u64 min_hit) { - __sort_chain_graph(chain_root); + __sort_chain_graph(chain_root, min_hit); rb_root->rb_node = chain_root->rb_root.rb_node; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index dfa56008d9a..f3e4776e743 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -38,6 +38,8 @@ static inline void callchain_init(struct callchain_node *node) void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node); -void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node); +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit); +void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit); #endif -- cgit v1.2.3-70-g09d2 From 1e11fd82d247e4e48a1d6c49402214434538d3fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 20:14:34 +0200 Subject: perf_counter tools: Provide helper to print percents color Among perf annotate, perf report and perf top, we can find the common colored printing of percents according to the following rules: High overhead = > 5%, colored in red Mid overhead = > 0.5%, colored in green Low overhead = < 0.5%, default color Factorize these multiple checks in a single function named percent_color_fprintf() and also provide a get_percent_color() for sites which print percentages and other things at the same time. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246558475-10624-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 26 ++------------------------ tools/perf/builtin-report.c | 21 +++------------------ tools/perf/builtin-top.c | 15 +-------------- tools/perf/util/color.c | 27 +++++++++++++++++++++++++++ tools/perf/util/color.h | 5 +++++ 5 files changed, 38 insertions(+), 56 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 08ea6c5329d..5f9eefecc57 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -25,10 +25,6 @@ #define SHOW_USER 2 #define SHOW_HV 4 -#define MIN_GREEN 0.5 -#define MIN_RED 5.0 - - static char const *input_name = "perf.data"; static char *vmlinux = "vmlinux"; @@ -1047,24 +1043,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static char *get_color(double percent) -{ - char *color = PERF_COLOR_NORMAL; - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= MIN_RED) - color = PERF_COLOR_RED; - else { - if (percent > MIN_GREEN) - color = PERF_COLOR_GREEN; - } - return color; -} - static int parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) { @@ -1126,7 +1104,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) } else if (sym->hist_sum) percent = 100.0 * hits / sym->hist_sum; - color = get_color(percent); + color = get_percent_color(percent); /* * Also color the filename and line if needed, with @@ -1262,7 +1240,7 @@ static void print_summary(char *filename) sym_ext = rb_entry(node, struct sym_ext, node); percent = sym_ext->percent; - color = get_color(percent); + color = get_percent_color(percent); path = sym_ext->path; color_fprintf(stdout, color, " %7.2f %s", percent, path); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e8c98179fe4..c9dbe331549 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -947,25 +947,10 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) if (exclude_other && !self->parent) return 0; - if (total_samples) { - double percent = self->count * 100.0 / total_samples; - char *color = PERF_COLOR_NORMAL; - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= 5.0) { - color = PERF_COLOR_RED; - } else { - if (percent >= 0.5) - color = PERF_COLOR_GREEN; - } - - ret = color_fprintf(fp, color, " %6.2f%%", + if (total_samples) + ret = percent_color_fprintf(fp, " %6.2f%%", (self->count * 100.0) / total_samples); - } else + else ret = fprintf(fp, "%12Ld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index aa044ea1482..95d5c0ae375 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -239,7 +239,6 @@ static void print_sym_table(void) for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); struct symbol *sym = (struct symbol *)(syme + 1); - char *color = PERF_COLOR_NORMAL; double pcnt; if (++printed > print_entries || syme->snap_count < count_filter) @@ -248,24 +247,12 @@ static void print_sym_table(void) pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (pcnt >= 5.0) { - color = PERF_COLOR_RED; - } else { - if (pcnt >= 0.5) - color = PERF_COLOR_GREEN; - } - if (nr_counters == 1) printf("%20.2f - ", syme->weight); else printf("%9.1f %10ld - ", syme->weight, syme->snap_count); - color_fprintf(stdout, color, "%4.1f%%", pcnt); + percent_color_fprintf(stdout, "%4.1f%%", pcnt); printf(" - %016llx : %s", sym->start, sym->name); if (sym->module) printf("\t[%s]", sym->module->name); diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 26f82318b86..90a044d1fe7 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -242,4 +242,31 @@ int color_fwrite_lines(FILE *fp, const char *color, return 0; } +char *get_percent_color(double percent) +{ + char *color = PERF_COLOR_NORMAL; + + /* + * We color high-overhead entries in red, mid-overhead + * entries in green - and keep the low overhead places + * normal: + */ + if (percent >= MIN_RED) + color = PERF_COLOR_RED; + else { + if (percent > MIN_GREEN) + color = PERF_COLOR_GREEN; + } + return color; +} +int percent_color_fprintf(FILE *fp, const char *fmt, double percent) +{ + int r; + char *color; + + color = get_percent_color(percent); + r = color_fprintf(fp, color, fmt, percent); + + return r; +} diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 5abfd379582..706cec50bd2 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -15,6 +15,9 @@ #define PERF_COLOR_CYAN "\033[36m" #define PERF_COLOR_BG_RED "\033[41m" +#define MIN_GREEN 0.5 +#define MIN_RED 5.0 + /* * This variable stores the value of color.ui */ @@ -32,5 +35,7 @@ void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int percent_color_fprintf(FILE *fp, const char *fmt, double percent); +char *get_percent_color(double percent); #endif /* COLOR_H */ -- cgit v1.2.3-70-g09d2 From 24b57c6988c5791628c89a8838910991abc9cc1e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 20:14:35 +0200 Subject: perf_counter tools: Display percents of hits in callchain with overhead colors This adds the use of colors to signal at a glance the important overhead thresholds in callchains hit rates. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246558475-10624-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c9dbe331549..283773d91aa 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -824,7 +824,7 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, double percent; percent = hits * 100.0 / total_samples; - ret += fprintf(fp, "--%2.2f%%-- ", percent); + ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); } else ret += fprintf(fp, "%s", " "); } @@ -924,7 +924,8 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; if (callchain_mode == FLAT) { - ret += fprintf(fp, " %6.2f%%\n", percent); + ret += percent_color_fprintf(fp, " %6.2f%%\n", + percent); ret += callchain__fprintf_flat(fp, chain, total_samples); } else if (callchain_mode == GRAPH) { ret += callchain__fprintf_graph(fp, chain, -- cgit v1.2.3-70-g09d2 From 2d4618dce6a318ff4ec78dfe492cc3793015d540 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 23 Jun 2009 21:38:49 -0400 Subject: parisc: perf: wire up sys_perf_counter_open Signed-off-by: Kyle McMartin --- arch/parisc/Kconfig | 1 + arch/parisc/include/asm/perf_counter.h | 7 +++++++ arch/parisc/kernel/syscall_table.S | 2 +- tools/perf/perf.h | 6 ++++++ 4 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 arch/parisc/include/asm/perf_counter.h (limited to 'tools') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 9038f39d9d7..c38bbc7af79 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,6 +16,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG + select HAVE_PERF_COUNTERS help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h new file mode 100644 index 00000000000..dc9e829f701 --- /dev/null +++ b/arch/parisc/include/asm/perf_counter.h @@ -0,0 +1,7 @@ +#ifndef __ASM_PARISC_PERF_COUNTER_H +#define __ASM_PARISC_PERF_COUNTER_H + +/* parisc only supports software counters through this interface. */ +static inline void set_perf_counter_pending(void) { } + +#endif /* __ASM_PARISC_PERF_COUNTER_H */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 0c83673a03a..cf145eb026b 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -416,7 +416,7 @@ ENTRY_COMP(preadv) /* 315 */ ENTRY_COMP(pwritev) ENTRY_COMP(rt_tgsigqueueinfo) - ENTRY_SAME(ni_syscall) + ENTRY_SAME(perf_counter_open) /* Nothing yet */ diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ceb68aa51f7..188801b4ebc 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,6 +19,12 @@ #define cpu_relax() asm volatile("" ::: "memory"); #endif +#ifdef __hppa__ +#include "../../arch/parisc/include/asm/unistd.h" +#define rmb() asm volatile("" ::: "memory") +#define cpu_relax() asm volatile("" ::: "memory"); +#endif + #include #include #include -- cgit v1.2.3-70-g09d2 From 30d7a77dd5a9720430af72f6f62f5156fe073e55 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Jul 2009 21:24:14 -0300 Subject: perf_counter tools: Adjust symbols in ET_EXEC files too Ingo Molnar wrote: > i just bisected a 'perf report' bug that would cause us to not > resolve all user-space symbols in a 'git gc' run to: > > f5812a7a336fb952d819e4427b9a2dce02368e82 is first bad commit > commit f5812a7a336fb952d819e4427b9a2dce02368e82 > Author: Arnaldo Carvalho de Melo > Date: Tue Jun 30 11:43:17 2009 -0300 > > perf_counter tools: Adjust only prelinked symbol's addresses Rename ->prelinked to ->adjust_symbols and making what was done only for prelinked libraries also to ET_EXEC binaries, such as /usr/bin/git: [acme@doppio pahole]$ readelf -h /usr/bin/git | grep Type Type: EXEC (Executable file) [acme@doppio pahole]$ And after installing the 'git-debuginfo' package, I get correct results: [acme@doppio linux-2.6-tip]$ perf report --sort comm,dso,symbol -d /usr/bin/git | head -20 # # (1139614 samples) # # Overhead Command Shared Object Symbol # ........ ................ ......................... ...... # 34.98% git /usr/bin/git [.] send_sideband 33.39% git /usr/bin/git [.] enter_repo 6.81% git /usr/bin/git [.] diff_opt_parse 4.95% git /usr/bin/git [.] is_repository_shallow 3.24% git /usr/bin/git [.] odb_mkstemp 1.39% git /usr/bin/git [.] output 1.34% git /usr/bin/git [.] xmmap 1.25% git /usr/bin/git [.] receive_pack_config 1.16% git /usr/bin/git [.] git_pathdup 0.90% git /usr/bin/git [.] read_object_with_reference 0.86% git /usr/bin/git [.] show_patch_diff 0.85% git /usr/bin/git 0x00000000095e2e 0.69% git /usr/bin/git [.] display [acme@doppio linux-2.6-tip]$ I'll check what are the last cases where we can't resolve symbols, like this 0x00000000095e2e later. And I guess this will fix the problems Mike were seeing too: [acme@doppio linux-2.6-tip]$ readelf -h ../build/perf/vmlinux | grep Type Type: EXEC (Executable file) [acme@doppio linux-2.6-tip]$ Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 11 ++++++----- tools/perf/util/symbol.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 98a13114de7..4683b67b5ee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -555,9 +555,10 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL; + self->adjust_symbols = (ehdr.e_type == ET_EXEC || + elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL); elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; @@ -580,7 +581,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, section_name = elf_sec__name(&shdr, secstrs); obj_start = sym.st_value; - if (self->prelinked) { + if (self->adjust_symbols) { if (verbose >= 2) printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); @@ -632,7 +633,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) if (!name) return -1; - self->prelinked = 0; + self->adjust_symbols = 0; if (strncmp(self->name, "/tmp/perf-", 10) == 0) return dso__load_perf_map(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 4e141a30911..7918cffb23c 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -24,7 +24,7 @@ struct dso { struct rb_root syms; struct symbol *(*find_symbol)(struct dso *, u64 ip); unsigned int sym_priv_size; - unsigned char prelinked; + unsigned char adjust_symbols; char name[0]; }; -- cgit v1.2.3-70-g09d2 From 029e5b1636d0511ef143af3a20c83c48e44c03f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Jul 2009 13:17:28 +0200 Subject: perf report: Annotate variable initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain versions of GCC dont see the initialization that is done here: builtin-report.c: In function ‘__cmd_report’: builtin-report.c:1038: warning: ‘syms’ may be used uninitialized in this function So annotate it with a NULL initialization. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 283773d91aa..fa937f5c3c3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1035,7 +1035,7 @@ resolve_callchain(struct thread *thread, struct map *map __used, struct ip_callchain *chain, struct hist_entry *entry) { u64 context = PERF_CONTEXT_MAX; - struct symbol **syms; + struct symbol **syms = NULL; unsigned int i; if (callchain) { -- cgit v1.2.3-70-g09d2 From 91b4eaea93f5be95f4477554399680a53aff2343 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:17 +0200 Subject: perf report: Warn on callchain output request from non-callchain file perf report segfaults while trying to handle callchains from a non callchain data file. Instead of a segfault, print a useful message to the user. Reported-by: Jens Axboe Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa937f5c3c3..9f9575afab0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1684,9 +1684,19 @@ static int __cmd_report(void) sample_type = perf_header__sample_type(); - if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { - fprintf(stderr, "selected --sort parent, but no callchain data\n"); - exit(-1); + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (sort__has_parent) { + fprintf(stderr, "selected --sort parent, but no" + " callchain data. Did you call" + " perf record without -g?\n"); + exit(-1); + } + if (callchain) { + fprintf(stderr, "selected -c but no callchain data." + " Did you call perf record without" + " -g?\n"); + exit(-1); + } } if (load_kernel() < 0) { -- cgit v1.2.3-70-g09d2 From be9038859e56f729cc9d3b070a35fb8829a73696 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:18 +0200 Subject: perf report: Use a modifiable string for default callchain options If the user doesn't provide options to tune his callchain output (ie: if he uses -c without arguments) then the default value passed in the OPT_CALLBACK_DEFAULT() macro is used. But it's parsed later by strtok() which will replace comma separators to a zero. This may segfault as we are using a read-only string. Use a modifiable one instead, and also fix the "100%" default minimum threshold value by turning it into a 0 (output every callchains) as it was intended in the origin. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9f9575afab0..3db99fd9986 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -58,6 +58,8 @@ static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; static int exclude_other = 1; + +static char callchain_default_opt[] = "flat,0"; static int callchain; static enum chain_mode callchain_mode; static double callchain_min_percent = 0.0; @@ -1871,7 +1873,7 @@ static const struct option options[] = { "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type,min_percent", "Display callchains using output_type and min percent threshold. " - "Default: flat,0", &parse_callchain_opt, "flat,100"), + "Default: flat,0", &parse_callchain_opt, callchain_default_opt), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", -- cgit v1.2.3-70-g09d2 From 94a8eb028a57854157a936c7e66b09e2559f115a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:19 +0200 Subject: perf report: Change default callchain parameters The default callchain parameters are set to use the flat mode and never filter any overhead threshold of backtrace. But flat mode is boring compared to graph mode. Also the number of callchains may be very high if none is filtered. Let's change this to set the graph view and a minimum overhead of 0.5% as default parameters. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3db99fd9986..8bd58651128 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,10 +59,10 @@ static regex_t parent_regex; static int exclude_other = 1; -static char callchain_default_opt[] = "flat,0"; +static char callchain_default_opt[] = "graph,0.5"; static int callchain; static enum chain_mode callchain_mode; -static double callchain_min_percent = 0.0; +static double callchain_min_percent = 0.5; static u64 sample_type; -- cgit v1.2.3-70-g09d2 From e05b876c222178bc6abcfa9f23d8311731691046 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:20 +0200 Subject: perf_counter tools: callchains: Manage the cumul hits on the fly The cumul hits are the number of hits of every childs of a node plus the hits of the current nodes, required for percentage computing of a branch. Theses numbers are calculated during the sorting of the branches of the callchain tree using a depth first postfix traversal, so that cumulative hits are propagated in the right order. But if we plan to implement percentages relative to the parent and not absolute percentages (relative to the whole overhead), we need to know the cumulative hits of the parent before computing the children because the relative minimum acceptable number of entries (ie: minimum rate against the cumulative hits from the parent) is the basis to filter the children against a given rate. Then we need to handle the cumul hits on the fly to prepare the implementation of relative overhead rates. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index c9900fe6b8b..5d244afb7cd 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -74,13 +74,11 @@ static void __sort_chain_graph(struct callchain_node *node, u64 min_hit) struct callchain_node *child; node->rb_root = RB_ROOT; - node->cumul_hit = node->hit; chain_for_each_child(child, node) { __sort_chain_graph(child, min_hit); if (child->cumul_hit >= min_hit) rb_insert_callchain(&node->rb_root, child, GRAPH); - node->cumul_hit += child->cumul_hit; } } @@ -159,7 +157,7 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, new = create_child(parent, false); fill_node(new, chain, start, syms); - new->hit = 1; + new->cumul_hit = new->hit = 1; } /* @@ -189,6 +187,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, /* split the hits */ new->hit = parent->hit; + new->cumul_hit = parent->cumul_hit; new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; @@ -216,10 +215,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) - return; + goto cumul; } /* nothing in children, add to the current node */ add_child(root, chain, start, syms); + +cumul: + root->cumul_hit++; } static int @@ -261,6 +263,8 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { root->hit++; + root->cumul_hit++; + return 0; } -- cgit v1.2.3-70-g09d2 From 805d127d62472f17c7d79baa001a7651afe2fa47 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:21 +0200 Subject: perf report: Add "Fractal" mode output - support callchains with relative overhead rate The current callchain displays the overhead rates as absolute: relative to the total overhead. This patch provides relative overhead percentage, in which each branch of the callchain tree is a independant instrumentated object. This provides a 'fractal' view of the call-chain profile: each sub-graph looks like a profile in itself - relative to its parent. You can produce such output by using the "fractal" mode that you can abbreviate via f, fr, fra, frac, etc... ./perf report -s sym -c fractal Example: 8.46% [k] copy_user_generic_string | |--52.01%-- generic_file_aio_read | do_sync_read | vfs_read | | | |--97.20%-- sys_pread64 | | system_call_fastpath | | pread64 | | | --2.81%-- sys_read | system_call_fastpath | __read | |--39.85%-- generic_file_buffered_write | __generic_file_aio_write_nolock | generic_file_aio_write | do_sync_write | reiserfs_file_write | vfs_write | | | |--97.05%-- sys_pwrite64 | | system_call_fastpath | | __pwrite64 | | | --2.95%-- sys_write | system_call_fastpath | __write_nocancel [...] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 60 +++++++++++++++++++++----------- tools/perf/util/callchain.c | 84 +++++++++++++++++++++++++++++++++++++-------- tools/perf/util/callchain.h | 21 ++++++++---- 3 files changed, 124 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8bd58651128..4e5cc266311 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,10 +59,15 @@ static regex_t parent_regex; static int exclude_other = 1; -static char callchain_default_opt[] = "graph,0.5"; +static char callchain_default_opt[] = "fractal,0.5"; + static int callchain; -static enum chain_mode callchain_mode; -static double callchain_min_percent = 0.5; + +static +struct callchain_param callchain_param = { + .mode = CHAIN_GRAPH_ABS, + .min_percent = 0.5 +}; static u64 sample_type; @@ -846,9 +851,15 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, struct callchain_node *child; struct callchain_list *chain; int new_depth_mask = depth_mask; + u64 new_total; size_t ret = 0; int i; + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = self->cumul_hit; + else + new_total = total_samples; + node = rb_first(&self->rb_root); while (node) { child = rb_entry(node, struct callchain_node, rb_node); @@ -873,10 +884,10 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, continue; ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, - total_samples, + new_total, child->cumul_hit); } - ret += callchain__fprintf_graph(fp, child, total_samples, + ret += callchain__fprintf_graph(fp, child, new_total, depth + 1, new_depth_mask | (1 << depth)); node = next; @@ -925,13 +936,18 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; - if (callchain_mode == FLAT) { + switch (callchain_param.mode) { + case CHAIN_FLAT: ret += percent_color_fprintf(fp, " %6.2f%%\n", percent); ret += callchain__fprintf_flat(fp, chain, total_samples); - } else if (callchain_mode == GRAPH) { + break; + case CHAIN_GRAPH_ABS: /* Falldown */ + case CHAIN_GRAPH_REL: ret += callchain__fprintf_graph(fp, chain, total_samples, 1, 1); + default: + break; } ret += fprintf(fp, "\n"); rb_node = rb_next(rb_node); @@ -1219,14 +1235,9 @@ static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) struct rb_node *parent = NULL; struct hist_entry *iter; - if (callchain) { - if (callchain_mode == FLAT) - sort_chain_flat(&he->sorted_chain, &he->callchain, - min_callchain_hits); - else if (callchain_mode == GRAPH) - sort_chain_graph(&he->sorted_chain, &he->callchain, - min_callchain_hits); - } + if (callchain) + callchain_param.sort(&he->sorted_chain, &he->callchain, + min_callchain_hits, &callchain_param); while (*p != NULL) { parent = *p; @@ -1249,7 +1260,7 @@ static void output__resort(u64 total_samples) struct rb_root *tree = &hist; u64 min_callchain_hits; - min_callchain_hits = total_samples * (callchain_min_percent / 100); + min_callchain_hits = total_samples * (callchain_param.min_percent / 100); if (sort__need_collapse) tree = &collapse_hists; @@ -1829,22 +1840,31 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, /* get the output mode */ if (!strncmp(tok, "graph", strlen(arg))) - callchain_mode = GRAPH; + callchain_param.mode = CHAIN_GRAPH_ABS; else if (!strncmp(tok, "flat", strlen(arg))) - callchain_mode = FLAT; + callchain_param.mode = CHAIN_FLAT; + + else if (!strncmp(tok, "fractal", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_REL; + else return -1; /* get the min percentage */ tok = strtok(NULL, ","); if (!tok) - return 0; + goto setup; - callchain_min_percent = strtod(tok, &endptr); + callchain_param.min_percent = strtod(tok, &endptr); if (tok == endptr) return -1; +setup: + if (register_callchain_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain params\n"); + return -1; + } return 0; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 5d244afb7cd..9d3c8141b8c 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -32,13 +32,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, rnode = rb_entry(parent, struct callchain_node, rb_node); switch (mode) { - case FLAT: + case CHAIN_FLAT: if (rnode->hit < chain->hit) p = &(*p)->rb_left; else p = &(*p)->rb_right; break; - case GRAPH: + case CHAIN_GRAPH_ABS: /* Falldown */ + case CHAIN_GRAPH_REL: if (rnode->cumul_hit < chain->cumul_hit) p = &(*p)->rb_left; else @@ -53,43 +54,96 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, rb_insert_color(&chain->rb_node, root); } +static void +__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit) +{ + struct callchain_node *child; + + chain_for_each_child(child, node) + __sort_chain_flat(rb_root, child, min_hit); + + if (node->hit && node->hit >= min_hit) + rb_insert_callchain(rb_root, node, CHAIN_FLAT); +} + /* * Once we get every callchains from the stream, we can now * sort them by hit */ -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, - u64 min_hit) +static void +sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit, struct callchain_param *param __used) +{ + __sort_chain_flat(rb_root, node, min_hit); +} + +static void __sort_chain_graph_abs(struct callchain_node *node, + u64 min_hit) { struct callchain_node *child; - chain_for_each_child(child, node) - sort_chain_flat(rb_root, child, min_hit); + node->rb_root = RB_ROOT; - if (node->hit && node->hit >= min_hit) - rb_insert_callchain(rb_root, node, FLAT); + chain_for_each_child(child, node) { + __sort_chain_graph_abs(child, min_hit); + if (child->cumul_hit >= min_hit) + rb_insert_callchain(&node->rb_root, child, + CHAIN_GRAPH_ABS); + } +} + +static void +sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, + u64 min_hit, struct callchain_param *param __used) +{ + __sort_chain_graph_abs(chain_root, min_hit); + rb_root->rb_node = chain_root->rb_root.rb_node; } -static void __sort_chain_graph(struct callchain_node *node, u64 min_hit) +static void __sort_chain_graph_rel(struct callchain_node *node, + double min_percent) { struct callchain_node *child; + u64 min_hit; node->rb_root = RB_ROOT; + min_hit = node->cumul_hit * min_percent / 100.0; chain_for_each_child(child, node) { - __sort_chain_graph(child, min_hit); + __sort_chain_graph_rel(child, min_percent); if (child->cumul_hit >= min_hit) - rb_insert_callchain(&node->rb_root, child, GRAPH); + rb_insert_callchain(&node->rb_root, child, + CHAIN_GRAPH_REL); } } -void -sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root, - u64 min_hit) +static void +sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, + u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph(chain_root, min_hit); + __sort_chain_graph_rel(chain_root, param->min_percent); rb_root->rb_node = chain_root->rb_root.rb_node; } +int register_callchain_param(struct callchain_param *param) +{ + switch (param->mode) { + case CHAIN_GRAPH_ABS: + param->sort = sort_chain_graph_abs; + break; + case CHAIN_GRAPH_REL: + param->sort = sort_chain_graph_rel; + break; + case CHAIN_FLAT: + param->sort = sort_chain_flat; + break; + default: + return -1; + } + return 0; +} + /* * Create a child for a parent. If inherit_children, then the new child * will become the new parent of it's parent children diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index f3e4776e743..7812122bea1 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -7,8 +7,9 @@ #include "symbol.h" enum chain_mode { - FLAT, - GRAPH + CHAIN_FLAT, + CHAIN_GRAPH_ABS, + CHAIN_GRAPH_REL }; struct callchain_node { @@ -23,6 +24,17 @@ struct callchain_node { u64 cumul_hit; /* hit + hits of children */ }; +struct callchain_param; + +typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, + u64, struct callchain_param *); + +struct callchain_param { + enum chain_mode mode; + double min_percent; + sort_chain_func_t sort; +}; + struct callchain_list { u64 ip; struct symbol *sym; @@ -36,10 +48,7 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } +int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, - u64 min_hit); -void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node, - u64 min_hit); #endif -- cgit v1.2.3-70-g09d2 From 9590b7ba3fefdfe0c7741f5e2f61faf2ffcea19c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 6 Jul 2009 22:01:31 +1000 Subject: perf_counter tools: Rename cache events to remove $ The cache events contain '$' which will hit shell variable expansion. To avoid confusion change this to 'cache', ie L1-d$-loads becomes L1-dcache-loads. Signed-off-by: Anton Blanchard Cc: Roland Dreier Cc: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <20090706120131.GB4391@kryten> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5184959e061..518a33affe1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,8 +71,8 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-d$", "l1-d", "l1d", "L1-data", }, - { "L1-i$", "l1-i", "l1i", "L1-instruction", }, + { "L1-dcache", "l1-d", "l1d", "L1-data", }, + { "L1-icache", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2" }, { "dTLB", "d-tlb", "Data-TLB", }, { "iTLB", "i-tlb", "Instruction-TLB", }, -- cgit v1.2.3-70-g09d2 From 11d1578f9454159c43499d1d8fe8a7d728c176a3 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Wed, 8 Jul 2009 17:46:14 -0400 Subject: perf_counter: Add P6 PMU support Add basic P6 PMU support. The P6 uses the EVNTSEL0 EN bit to enable/disable both its counters. We use this for the global enable/disable, and clear all config bits (except EN) to disable individual counters. Actual ia32 hardware doesn't support lfence, so use a locked op without side-effect to implement a full barrier. perf stat and perf record seem to function correctly. [a.p.zijlstra@chello.nl: cleanups and complete the enable/disable code] Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 227 ++++++++++++++++++++++++++++++++++--- tools/perf/perf.h | 8 +- 2 files changed, 220 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 36c3dc7b899..1910f39ff19 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -65,6 +65,44 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int event) +{ + return p6_perfmon_event_map[event]; +} + +static u64 p6_pmu_raw_event(u64 event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_COUNTER_MASK) + + return event & P6_EVNTSEL_MASK; +} + + /* * Intel PerfMon v3. Used on Core2 and later. */ @@ -726,6 +764,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return 0; } +static void p6_pmu_disable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + static void intel_pmu_disable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -767,6 +822,23 @@ void hw_perf_disable(void) return x86_pmu.disable_all(); } +static void p6_pmu_enable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + static void intel_pmu_enable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); @@ -819,16 +891,13 @@ static inline void intel_pmu_ack_status(u64 ack) static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { - int err; - err = checking_wrmsrl(hwc->config_base + idx, + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { - int err; - err = checking_wrmsrl(hwc->config_base + idx, - hwc->config); + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } static inline void @@ -836,13 +905,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; - int err; mask = 0xfULL << (idx * 4); rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - err = checking_wrmsrl(hwc->config_base, ctrl_val); + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void +p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE; + + if (!cpuc->enabled) + val = 0; + + (void)checking_wrmsrl(hwc->config_base + idx, val); } static inline void @@ -943,6 +1023,17 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } +static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + x86_pmu_enable_counter(hwc, idx); + else + x86_pmu_disable_counter(hwc, idx); +} + + static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { @@ -1176,6 +1267,49 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } +static int p6_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_counters *cpuc; + struct perf_counter *counter; + struct hw_perf_counter *hwc; + int idx, handled = 0; + u64 val; + + data.regs = regs; + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_counters); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + counter = cpuc->counters[idx]; + hwc = &counter->hw; + + val = x86_perf_counter_update(counter, hwc, idx); + if (val & (1ULL << (x86_pmu.counter_bits - 1))) + continue; + + /* + * counter overflow + */ + handled = 1; + data.period = counter->hw.last_period; + + if (!x86_perf_counter_set_period(counter, hwc, idx)) + continue; + + if (perf_counter_overflow(counter, 1, &data)) + p6_pmu_disable_counter(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} /* * This handler is triggered by the local APIC, so the APIC IRQ handling @@ -1185,14 +1319,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; - int bit, cpu, loops; + int bit, loops; u64 ack, status; data.regs = regs; data.addr = 0; - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); status = intel_pmu_get_status(); @@ -1249,14 +1382,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; - int cpu, idx, handled = 0; + int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) @@ -1353,6 +1485,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; +static struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = p6_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_counter, + .disable = p6_pmu_disable_counter, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_counters = 2, + /* + * Counters have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a counter for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .counter_bits = 32, + .counter_mask = (1ULL << 32) - 1, +}; + static struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1392,6 +1550,41 @@ static struct x86_pmu amd_pmu = { .max_period = (1ULL << 47) - 1, }; +static int p6_pmu_init(void) +{ + int high, low; + + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + break; + case 9: + case 13: + /* for Pentium M, we need to check if PMU exist */ + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (low & MSR_IA32_MISC_ENABLE_EMON) + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + if (!cpu_has_apic) { + pr_info("no Local APIC, try rebooting with lapic"); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + return 0; +} + static int intel_pmu_init(void) { union cpuid10_edx edx; @@ -1400,8 +1593,14 @@ static int intel_pmu_init(void) unsigned int ebx; int version; - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { return -ENODEV; + } + } /* * Check whether the Architectural PerfMon supports diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 27887c91643..53bb9550def 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -1,7 +1,13 @@ #ifndef _PERF_PERF_H #define _PERF_PERF_H -#if defined(__x86_64__) || defined(__i386__) +#if defined(__i386__) +#include "../../arch/x86/include/asm/unistd.h" +#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#if defined(__x86_64__) #include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); -- cgit v1.2.3-70-g09d2 From 52d422de22b26d96bbdfbc605eb31c2994df6d0b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 10 Jul 2009 22:47:28 -0300 Subject: perf report: Adjust column width to the values sampled Auto-adjust column width of perf report output to the longest occuring string length. Example: [acme@doppio pahole]$ perf report --sort comm,dso,symbol | head -13 12.79% pahole /usr/lib64/libdw-0.141.so [.] __libdw_find_attr 8.90% pahole /lib64/libc-2.10.1.so [.] _int_malloc 8.68% pahole /usr/lib64/libdw-0.141.so [.] __libdw_form_val_len 8.15% pahole /lib64/libc-2.10.1.so [.] __GI_strcmp 6.80% pahole /lib64/libc-2.10.1.so [.] __tsearch 5.54% pahole ./build/libdwarves.so.1.0.0 [.] tag__recode_dwarf_type [acme@doppio pahole]$ [acme@doppio pahole]$ perf report --sort comm,dso,symbol -d /lib64/libc-2.10.1.so | head -10 21.92% pahole /lib64/libc-2.10.1.so [.] _int_malloc 20.08% pahole /lib64/libc-2.10.1.so [.] __GI_strcmp 16.75% pahole /lib64/libc-2.10.1.so [.] __tsearch [acme@doppio pahole]$ Also add these extra options to control the new behaviour: -w, --field-width Force each column width to the provided list, for large terminal readability. -t, --field-separator: Use a special separator character and don't pad with spaces, replacing all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090711014728.GH3452@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 12 +++ tools/perf/builtin-report.c | 174 +++++++++++++++++++++++++------ tools/perf/util/include/linux/kernel.h | 8 ++ tools/perf/util/symbol.c | 1 + tools/perf/util/symbol.h | 1 + 5 files changed, 164 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8aa3f8c8870..05774dfbd14 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -33,6 +33,18 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. +-w:: +--field-width=:: + Force each column width to the provided list, for large terminal + readability. + +-t:: +--field-separator=:: + + Use a special separator character and don't pad with spaces, replacing + all occurances of this separator in symbol names (and other output) + with a '.' character, that thus it's the only non valid separator. + SEE ALSO -------- linkperf:perf-stat[1] diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4e5cc266311..740da43f313 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,10 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str, *comm_list_str, *sym_list_str; +static char *dso_list_str, *comm_list_str, *sym_list_str, + *col_width_list_str; static struct strlist *dso_list, *comm_list, *sym_list; +static char *field_sep; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -129,6 +131,33 @@ typedef union event_union { struct read_event read; } event_t; +static int repsep_fprintf(FILE *fp, const char *fmt, ...) +{ + int n; + va_list ap; + + va_start(ap, fmt); + if (!field_sep) + n = vfprintf(fp, fmt, ap); + else { + char *bf = NULL; + n = vasprintf(&bf, fmt, ap); + if (n > 0) { + char *sep = bf; + while (1) { + sep = strchr(sep, *field_sep); + if (sep == NULL) + break; + *sep = '.'; + } + } + fputs(bf, fp); + free(bf); + } + va_end(ap); + return n; +} + static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; @@ -360,12 +389,28 @@ static struct thread *thread__new(pid_t pid) return self; } +static unsigned int dsos__col_width, + comms__col_width, + threads__col_width; + static int thread__set_comm(struct thread *self, const char *comm) { if (self->comm) free(self->comm); self->comm = strdup(comm); - return self->comm ? 0 : -ENOMEM; + if (!self->comm) + return -ENOMEM; + + if (!col_width_list_str && !field_sep && + (!comm_list || strlist__has_entry(comm_list, comm))) { + unsigned int slen = strlen(comm); + if (slen > comms__col_width) { + comms__col_width = slen; + threads__col_width = slen + 6; + } + } + + return 0; } static size_t thread__fprintf(struct thread *self, FILE *fp) @@ -536,7 +581,8 @@ struct sort_entry { int64_t (*cmp)(struct hist_entry *, struct hist_entry *); int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *); + size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); + unsigned int *width; }; static int64_t cmp_null(void *l, void *r) @@ -558,15 +604,17 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__thread_print(FILE *fp, struct hist_entry *self) +sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) { - return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); + return repsep_fprintf(fp, "%*s:%5d", width - 6, + self->thread->comm ?: "", self->thread->pid); } static struct sort_entry sort_thread = { - .header = " Command: Pid", + .header = "Command: Pid", .cmp = sort__thread_cmp, .print = sort__thread_print, + .width = &threads__col_width, }; /* --sort comm */ @@ -590,16 +638,17 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__comm_print(FILE *fp, struct hist_entry *self) +sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) { - return fprintf(fp, "%16s", self->thread->comm); + return repsep_fprintf(fp, "%*s", width, self->thread->comm); } static struct sort_entry sort_comm = { - .header = " Command", + .header = "Command", .cmp = sort__comm_cmp, .collapse = sort__comm_collapse, .print = sort__comm_print, + .width = &comms__col_width, }; /* --sort dso */ @@ -617,18 +666,19 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__dso_print(FILE *fp, struct hist_entry *self) +sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) { if (self->dso) - return fprintf(fp, "%-25s", self->dso->name); + return repsep_fprintf(fp, "%-*s", width, self->dso->name); - return fprintf(fp, "%016llx ", (u64)self->ip); + return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); } static struct sort_entry sort_dso = { - .header = "Shared Object ", + .header = "Shared Object", .cmp = sort__dso_cmp, .print = sort__dso_print, + .width = &dsos__col_width, }; /* --sort symbol */ @@ -648,22 +698,23 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__sym_print(FILE *fp, struct hist_entry *self) +sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) { size_t ret = 0; if (verbose) - ret += fprintf(fp, "%#018llx ", (u64)self->ip); + ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); if (self->sym) { - ret += fprintf(fp, "[%c] %s", + ret += repsep_fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); if (self->sym->module) - ret += fprintf(fp, "\t[%s]", self->sym->module->name); + ret += repsep_fprintf(fp, "\t[%s]", + self->sym->module->name); } else { - ret += fprintf(fp, "%#016llx", (u64)self->ip); + ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); } return ret; @@ -690,19 +741,19 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__parent_print(FILE *fp, struct hist_entry *self) +sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) { - size_t ret = 0; - - ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); - - return ret; + return repsep_fprintf(fp, "%-*s", width, + self->parent ? self->parent->name : "[other]"); } +static unsigned int parent_symbol__col_width; + static struct sort_entry sort_parent = { - .header = "Parent symbol ", + .header = "Parent symbol", .cmp = sort__parent_cmp, .print = sort__parent_print, + .width = &parent_symbol__col_width, }; static int sort__need_collapse = 0; @@ -967,17 +1018,18 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) return 0; if (total_samples) - ret = percent_color_fprintf(fp, " %6.2f%%", - (self->count * 100.0) / total_samples); + ret = percent_color_fprintf(fp, + field_sep ? "%.2f" : " %6.2f%%", + (self->count * 100.0) / total_samples); else - ret = fprintf(fp, "%12Ld ", self->count); + ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { if (exclude_other && (se == &sort_parent)) continue; - fprintf(fp, " "); - ret += se->print(fp, self); + fprintf(fp, "%s", field_sep ?: " "); + ret += se->print(fp, self, se->width ? *se->width : 0); } ret += fprintf(fp, "\n"); @@ -992,6 +1044,18 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) * */ +static void dso__calc_col_width(struct dso *self) +{ + if (!col_width_list_str && !field_sep && + (!dso_list || strlist__has_entry(dso_list, self->name))) { + unsigned int slen = strlen(self->name); + if (slen > dsos__col_width) + dsos__col_width = slen; + } + + self->slen_calculated = 1; +} + static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, struct dso **dsop, u64 *ipp) @@ -1011,6 +1075,14 @@ resolve_symbol(struct thread *thread, struct map **mapp, map = thread__find_map(thread, ip); if (map != NULL) { + /* + * We have to do this here as we may have a dso + * with no symbol hit that has a name longer than + * the ones with symbols sampled. + */ + if (!map->dso->slen_calculated) + dso__calc_col_width(map->dso); + if (mapp) *mapp = map; got_map: @@ -1282,6 +1354,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) struct sort_entry *se; struct rb_node *nd; size_t ret = 0; + unsigned int width; + char *col_width = col_width_list_str; fprintf(fp, "\n"); fprintf(fp, "#\n"); @@ -1292,10 +1366,29 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) list_for_each_entry(se, &hist_entry__sort_list, list) { if (exclude_other && (se == &sort_parent)) continue; - fprintf(fp, " %s", se->header); + if (field_sep) { + fprintf(fp, "%c%s", *field_sep, se->header); + continue; + } + width = strlen(se->header); + if (se->width) { + if (col_width_list_str) { + if (col_width) { + *se->width = atoi(col_width); + col_width = strchr(col_width, ','); + if (col_width) + ++col_width; + } + } + width = *se->width = max(*se->width, width); + } + fprintf(fp, " %*s", width, se->header); } fprintf(fp, "\n"); + if (field_sep) + goto print_entries; + fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; @@ -1304,13 +1397,18 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) continue; fprintf(fp, " "); - for (i = 0; i < strlen(se->header); i++) + if (se->width) + width = *se->width; + else + width = strlen(se->header); + for (i = 0; i < width; i++) fprintf(fp, "."); } fprintf(fp, "\n"); fprintf(fp, "#\n"); +print_entries: for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct hist_entry, rb_node); ret += hist_entry__fprintf(fp, pos, total_samples); @@ -1900,6 +1998,12 @@ static const struct option options[] = { "only consider symbols in these comms"), OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_STRING('w', "column-widths", &col_width_list_str, + "width[,width...]", + "don't try to adjust column width, use these fixed values"), + OPT_STRING('t', "field-separator", &field_sep, "separator", + "separator for columns, no spaces will be added between " + "columns '.' is reserved."), OPT_END() }; @@ -1956,6 +2060,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) setup_list(&comm_list, comm_list_str, "comm"); setup_list(&sym_list, sym_list_str, "symbol"); + if (field_sep && *field_sep == '.') { + fputs("'.' is the only non valid --field-separator argument\n", + stderr); + exit(129); + } + setup_pager(); return __cmd_report(); diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 99c1b3d1edd..a6b87390cb5 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -18,4 +18,12 @@ (type *)((char *)__mptr - offsetof(type, member)); }) #endif +#ifndef max +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) +#endif + #endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4683b67b5ee..8efe7e41109 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -65,6 +65,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) self->syms = RB_ROOT; self->sym_priv_size = sym_priv_size; self->find_symbol = dso__find_symbol; + self->slen_calculated = 0; } return self; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 7918cffb23c..2f92b21c712 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -25,6 +25,7 @@ struct dso { struct symbol *(*find_symbol)(struct dso *, u64 ip); unsigned int sym_priv_size; unsigned char adjust_symbols; + unsigned char slen_calculated; char name[0]; }; -- cgit v1.2.3-70-g09d2 From 60c1baf1248e00d423604f018c8af1cf750ad885 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 11 Jul 2009 12:18:33 -0300 Subject: perf report: Tidy up reporting of symbols not found Always printing the level info about if it is in the kernel, hypervisor or userspace as that is in the hist_entry. Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1247325517-12272-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 740da43f313..617f4cb7f16 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -705,10 +705,9 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) if (verbose) ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); + ret += repsep_fprintf(fp, "[%c] ", self->level); if (self->sym) { - ret += repsep_fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : - self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); + ret += repsep_fprintf(fp, "%s", self->sym->name); if (self->sym->module) ret += repsep_fprintf(fp, "\t[%s]", -- cgit v1.2.3-70-g09d2 From 27d0fd410c3cee00ece2e55f4354a7a9ec1a6a6a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 11 Jul 2009 12:18:34 -0300 Subject: strlist: Introduce strlist__entry and strlist__nr_entries methods The strlist__entry method allows accessing strlists like an array, will be used in the 'perf report' to access the first entry. We now keep the nr_entries so that we can check if we have just one entry, will be used in 'perf report' to improve the output by showing just at the top when we have just, say, one DSO. While at it use nr_entries to optimize strlist__is_empty by not using the far more costly rb_first based implementation. Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1247325517-12272-2-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/strlist.c | 20 ++++++++++++++++++-- tools/perf/util/strlist.h | 11 +++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 025a78edfff..7ad38171dc2 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -64,6 +64,7 @@ int strlist__add(struct strlist *self, const char *new_entry) rb_link_node(&sn->rb_node, parent, p); rb_insert_color(&sn->rb_node, &self->entries); + ++self->nr_entries; return 0; } @@ -155,8 +156,9 @@ struct strlist *strlist__new(bool dupstr, const char *slist) struct strlist *self = malloc(sizeof(*self)); if (self != NULL) { - self->entries = RB_ROOT; - self->dupstr = dupstr; + self->entries = RB_ROOT; + self->dupstr = dupstr; + self->nr_entries = 0; if (slist && strlist__parse_list(self, slist) != 0) goto out_error; } @@ -182,3 +184,17 @@ void strlist__delete(struct strlist *self) free(self); } } + +struct str_node *strlist__entry(const struct strlist *self, unsigned int idx) +{ + struct rb_node *nd; + + for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + struct str_node *pos = rb_entry(nd, struct str_node, rb_node); + + if (!idx--) + return pos; + } + + return NULL; +} diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 2fdcfee8758..921818e44a5 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -11,7 +11,8 @@ struct str_node { struct strlist { struct rb_root entries; - bool dupstr; + unsigned int nr_entries; + bool dupstr; }; struct strlist *strlist__new(bool dupstr, const char *slist); @@ -21,11 +22,17 @@ void strlist__remove(struct strlist *self, struct str_node *sn); int strlist__load(struct strlist *self, const char *filename); int strlist__add(struct strlist *self, const char *str); +struct str_node *strlist__entry(const struct strlist *self, unsigned int idx); bool strlist__has_entry(struct strlist *self, const char *entry); static inline bool strlist__empty(const struct strlist *self) { - return rb_first(&self->entries) == NULL; + return self->nr_entries == 0; +} + +static inline unsigned int strlist__nr_entries(const struct strlist *self) +{ + return self->nr_entries; } int strlist__parse_list(struct strlist *self, const char *s); -- cgit v1.2.3-70-g09d2 From 021191b35cdfb1b5ee6e78ed5ae010114a40902c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 11 Jul 2009 12:18:35 -0300 Subject: perf report: Make the output more compact When we filter by column content we may end up with a column that has the same value for all the lines. So remove that column and tell its unique value on the top, as a comment. Example: [acme@doppio pahole]$ perf report --sort comm,dso,symbol -d ./build/libdwarves.so.1.0.0 -C pahole | head -15 # dso: ./build/libdwarves.so.1.0.0 # comm: pahole # Samples: 58409 # # Overhead Symbol # ........ ...... # 20.93% [.] tag__recode_dwarf_type 14.94% [.] namespace__recode_dwarf_types 10.38% [.] cu__table_add_tag 6.69% [.] __die__process_tag 5.05% [.] die__process_function 4.70% [.] list__for_all_tags 3.68% [.] tag__init 3.48% [.] die__create_new_parameter [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1247325517-12272-3-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 617f4cb7f16..f3422121d85 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -583,6 +583,7 @@ struct sort_entry { int64_t (*collapse)(struct hist_entry *, struct hist_entry *); size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); unsigned int *width; + bool elide; }; static int64_t cmp_null(void *l, void *r) @@ -1024,7 +1025,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { - if (exclude_other && (se == &sort_parent)) + if (se->elide) continue; fprintf(fp, "%s", field_sep ?: " "); @@ -1079,7 +1080,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, * with no symbol hit that has a name longer than * the ones with symbols sampled. */ - if (!map->dso->slen_calculated) + if (!sort_dso.elide && !map->dso->slen_calculated) dso__calc_col_width(map->dso); if (mapp) @@ -1356,14 +1357,12 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) unsigned int width; char *col_width = col_width_list_str; - fprintf(fp, "\n"); - fprintf(fp, "#\n"); - fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); + fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); list_for_each_entry(se, &hist_entry__sort_list, list) { - if (exclude_other && (se == &sort_parent)) + if (se->elide) continue; if (field_sep) { fprintf(fp, "%c%s", *field_sep, se->header); @@ -1392,7 +1391,7 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; - if (exclude_other && (se == &sort_parent)) + if (se->elide) continue; fprintf(fp, " "); @@ -2022,7 +2021,8 @@ static void setup_sorting(void) } static void setup_list(struct strlist **list, const char *list_str, - const char *list_name) + struct sort_entry *se, const char *list_name, + FILE *fp) { if (list_str) { *list = strlist__new(true, list_str); @@ -2031,6 +2031,11 @@ static void setup_list(struct strlist **list, const char *list_str, list_name); exit(129); } + if (strlist__nr_entries(*list) == 1) { + fprintf(fp, "# %s: %s\n", list_name, + strlist__entry(*list, 0)->s); + se->elide = true; + } } } @@ -2044,9 +2049,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) setup_sorting(); - if (parent_pattern != default_parent_pattern) + if (parent_pattern != default_parent_pattern) { sort_dimension__add("parent"); - else + sort_parent.elide = 1; + } else exclude_other = 0; /* @@ -2055,9 +2061,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(report_usage, options); - setup_list(&dso_list, dso_list_str, "dso"); - setup_list(&comm_list, comm_list_str, "comm"); - setup_list(&sym_list, sym_list_str, "symbol"); + setup_pager(); + + setup_list(&dso_list, dso_list_str, &sort_dso, "dso", stdout); + setup_list(&comm_list, comm_list_str, &sort_comm, "comm", stdout); + setup_list(&sym_list, sym_list_str, &sort_sym, "symbol", stdout); if (field_sep && *field_sep == '.') { fputs("'.' is the only non valid --field-separator argument\n", @@ -2065,7 +2073,5 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) exit(129); } - setup_pager(); - return __cmd_report(); } -- cgit v1.2.3-70-g09d2 From a25e46c46311316cd1b3f27f8bb036df1693c032 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 11 Jul 2009 12:18:36 -0300 Subject: perf_counter tools: PLT info is stripped in -debuginfo packages So we need to get the richer .symtab from the debuginfo packages but the PLT info from the original DSO where we have just the leaner .dynsym symtab. Example: | [acme@doppio pahole]$ perf report --sort comm,dso,symbol > before | [acme@doppio pahole]$ perf report --sort comm,dso,symbol > after | [acme@doppio pahole]$ diff -U1 before after | --- before 2009-07-11 11:04:22.688595741 -0300 | +++ after 2009-07-11 11:04:33.380595676 -0300 | @@ -80,3 +80,2 @@ | 0.07% pahole ./build/pahole [.] pahole_stealer | - 0.06% pahole /usr/lib64/libdw-0.141.so [.] 0x00000000007140 | 0.06% pahole /usr/lib64/libdw-0.141.so [.] __libdw_getabbrev | @@ -91,2 +90,3 @@ | 0.06% pahole [kernel] [k] free_hot_cold_page | + 0.06% pahole /usr/lib64/libdw-0.141.so [.] tfind@plt | 0.05% pahole ./build/libdwarves.so.1.0.0 [.] ftype__add_parameter | @@ -242,2 +242,3 @@ | 0.01% pahole [kernel] [k] account_group_user_time | + 0.01% pahole /usr/lib64/libdw-0.141.so [.] strlen@plt | 0.01% pahole ./build/pahole [.] strcmp@plt | [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1247325517-12272-4-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 116 ++++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8efe7e41109..f40266b4845 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -374,36 +374,61 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, idx < nr_entries; \ ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) -static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, - GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym, - GElf_Shdr *shdr_dynsym, - size_t dynsym_idx, int verbose) +/* + * We need to check if we have a .dynsym, so that we can handle the + * .plt, synthesizing its symbols, that aren't on the symtabs (be it + * .dynsym or .symtab). + * And always look at the original dso, not at debuginfo packages, that + * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). + */ +static int dso__synthesize_plt_symbols(struct dso *self, int verbose) { uint32_t nr_rel_entries, idx; GElf_Sym sym; u64 plt_offset; GElf_Shdr shdr_plt; struct symbol *f; - GElf_Shdr shdr_rel_plt; + GElf_Shdr shdr_rel_plt, shdr_dynsym; Elf_Data *reldata, *syms, *symstrs; - Elf_Scn *scn_plt_rel, *scn_symstrs; + Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; + size_t dynsym_idx; + GElf_Ehdr ehdr; char sympltname[1024]; - int nr = 0, symidx; + Elf *elf; + int nr = 0, symidx, fd, err = 0; + + fd = open(self->name, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (elf == NULL) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out_elf_end; + + scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, + ".dynsym", &dynsym_idx); + if (scn_dynsym == NULL) + goto out_elf_end; - scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rela.plt", NULL); if (scn_plt_rel == NULL) { - scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rel.plt", NULL); if (scn_plt_rel == NULL) - return 0; + goto out_elf_end; } + err = -1; + if (shdr_rel_plt.sh_link != dynsym_idx) - return 0; + goto out_elf_end; - if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL) - return 0; + if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) + goto out_elf_end; /* * Fetch the relocation section to find the indexes to the GOT @@ -411,19 +436,19 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, */ reldata = elf_getdata(scn_plt_rel, NULL); if (reldata == NULL) - return -1; + goto out_elf_end; syms = elf_getdata(scn_dynsym, NULL); if (syms == NULL) - return -1; + goto out_elf_end; - scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link); + scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); if (scn_symstrs == NULL) - return -1; + goto out_elf_end; symstrs = elf_getdata(scn_symstrs, NULL); if (symstrs == NULL) - return -1; + goto out_elf_end; nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; @@ -442,7 +467,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, f = symbol__new(plt_offset, shdr_plt.sh_entsize, sympltname, self->sym_priv_size, 0, verbose); if (!f) - return -1; + goto out_elf_end; dso__insert_symbol(self, f); ++nr; @@ -460,19 +485,25 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, f = symbol__new(plt_offset, shdr_plt.sh_entsize, sympltname, self->sym_priv_size, 0, verbose); if (!f) - return -1; + goto out_elf_end; dso__insert_symbol(self, f); ++nr; } - } else { - /* - * TODO: There are still one more shdr_rel_plt.sh_type - * I have to investigate, but probably should be ignored. - */ } - return nr; + err = 0; +out_elf_end: + elf_end(elf); +out_close: + close(fd); + + if (err == 0) + return nr; +out: + fprintf(stderr, "%s: problems reading %s PLT info.\n", + __func__, self->name); + return 0; } static int dso__load_sym(struct dso *self, int fd, const char *name, @@ -486,9 +517,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, GElf_Shdr shdr; Elf_Data *syms; GElf_Sym sym; - Elf_Scn *sec, *sec_dynsym, *sec_strndx; + Elf_Scn *sec, *sec_strndx; Elf *elf; - size_t dynsym_idx; int nr = 0; elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); @@ -505,32 +535,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, goto out_elf_end; } - /* - * We need to check if we have a .dynsym, so that we can handle the - * .plt, synthesizing its symbols, that aren't on the symtabs (be it - * .dynsym or .symtab) - */ - sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr, - ".dynsym", &dynsym_idx); - if (sec_dynsym != NULL) { - nr = dso__synthesize_plt_symbols(self, elf, &ehdr, - sec_dynsym, &shdr, - dynsym_idx, verbose); - if (nr < 0) - goto out_elf_end; - } - - /* - * But if we have a full .symtab (that is a superset of .dynsym) we - * should add the symbols not in the .dynsyn - */ sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); if (sec == NULL) { - if (sec_dynsym == NULL) + sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); + if (sec == NULL) goto out_elf_end; - - sec = sec_dynsym; - gelf_getshdr(sec, &shdr); } syms = elf_getdata(sec, NULL); @@ -669,6 +678,11 @@ more: if (!ret) goto more; + if (ret > 0) { + int nr_plt = dso__synthesize_plt_symbols(self, verbose); + if (nr_plt > 0) + ret += nr_plt; + } out: free(name); return ret; -- cgit v1.2.3-70-g09d2 From e3d7e183dc276df2fcaf02af173a49ad119ba9f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 11 Jul 2009 12:18:37 -0300 Subject: perf report: Introduce -n/--show-nr-samples [acme@doppio pahole]$ perf report -ns comm,dso,symbol -d /lib64/libc-2.10.1.so -C pahole | head -17 21.94% 32101 [.] _int_malloc 20.10% 29402 [.] __GI_strcmp 16.77% 24533 [.] __tsearch 12.61% 18450 [.] malloc_consolidate 6.42% 9394 [.] _int_free 6.28% 9191 [.] __tfind 4.56% 6678 [.] __GI___libc_free 4.46% 6520 [.] _IO_vfprintf_internal 2.59% 3786 [.] __malloc 1.17% 1716 [.] __GI_memcpy [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1247325517-12272-5-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/builtin-report.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 05774dfbd14..e72e9311078 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,6 +24,9 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. +-n +--show-nr-samples + Show the number of samples for each symbol -C:: --comms=:: Only consider symbols in these comms. CSV that understands diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f3422121d85..430a195b858 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -51,6 +51,7 @@ static int verbose; static int modules; static int full_paths; +static int show_nr_samples; static unsigned long page_size; static unsigned long mmap_window = 32; @@ -1024,6 +1025,13 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) else ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); + if (show_nr_samples) { + if (field_sep) + fprintf(fp, "%c%lld", *field_sep, self->count); + else + fprintf(fp, "%11lld", self->count); + } + list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) continue; @@ -1361,6 +1369,12 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); + if (show_nr_samples) { + if (field_sep) + fprintf(fp, "%cSamples", *field_sep); + else + fputs(" Samples ", fp); + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) continue; @@ -1388,6 +1402,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) goto print_entries; fprintf(fp, "# ........"); + if (show_nr_samples) + fprintf(fp, " .........."); list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; @@ -1979,6 +1995,8 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), + OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, + "Show a column with the number of samples"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, -- cgit v1.2.3-70-g09d2 From 23cdb5d5171d591ec911aada682e09d53c14a810 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 13 Jul 2009 02:25:47 +0200 Subject: perf_counter tools: Fix index boundary check Keep index within event_type_descriptors[] Signed-off-by: Roel Kluin Cc: Andrew Morton Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4A5A7F0B.4070106@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 518a33affe1..d18c98edd00 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -436,7 +436,7 @@ void print_events(void) for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { type = syms->type + 1; - if (type > ARRAY_SIZE(event_type_descriptors)) + if (type >= ARRAY_SIZE(event_type_descriptors)) type = 0; if (type != prev_type) -- cgit v1.2.3-70-g09d2 From 11b5f81e1b0ea0bc84fe32f0a27054e052b2bf84 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: perf_counter: Synthesize VDSO mmap event perf record synthesizes mmap events for the running process. Right now it just catches file mappings, but we can check for the vdso symbol and add that too. Signed-off-by: Anton Blanchard Signed-off-by: Peter Zijlstra LKML-Reference: <20090716104817.517264409@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4ef78a5e6f3..072aaf0369f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -313,6 +313,10 @@ static void pid_synthesize_mmap_samples(pid_t pid) if (*pbf == 'x') { /* vm_exec */ char *execname = strchr(bf, '/'); + /* Catch VDSO */ + if (execname == NULL) + execname = strstr(bf, "[vdso]"); + if (execname == NULL) continue; -- cgit v1.2.3-70-g09d2 From 4bba828dd9bb950ad1fe340ef148a5436a10f131 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: perf_counter: Add perf record option to log addresses Add the -d or --data option to log event addresses (eg page faults). Signed-off-by: Anton Blanchard Signed-off-by: Peter Zijlstra LKML-Reference: <20090716104817.697698033@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 072aaf0369f..68a9be0d151 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -43,6 +43,7 @@ static int call_graph = 0; static int verbose = 0; static int inherit_stat = 0; static int no_samples = 0; +static int sample_address = 0; static long samples; static struct timeval last_read; @@ -405,6 +406,9 @@ static void create_counter(int counter, int cpu, pid_t pid) if (inherit_stat) attr->inherit_stat = 1; + if (sample_address) + attr->sample_type |= PERF_SAMPLE_ADDR; + if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; @@ -649,6 +653,8 @@ static const struct option options[] = { "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('s', "stat", &inherit_stat, "per thread counts"), + OPT_BOOLEAN('d', "data", &sample_address, + "Sample addresses"), OPT_BOOLEAN('n', "no-samples", &no_samples, "don't sample"), OPT_END() -- cgit v1.2.3-70-g09d2 From 1483b19f8f5e8ad0c8816de368b099322dad4db5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: perf_counter: Make call graph option consistent perf record uses -g for logging call graph data but perf report uses -c to print call graph data. Be consistent and use -g everywhere for call graph data. Also update the help text to reflect the current default - fractal,0.5 Signed-off-by: Anton Blanchard Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <20090716104817.803604373@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4e5cc266311..4b980cce705 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1891,9 +1891,9 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type,min_percent", + OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent", "Display callchains using output_type and min percent threshold. " - "Default: flat,0", &parse_callchain_opt, callchain_default_opt), + "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", -- cgit v1.2.3-70-g09d2 From 7f453c24b95a085fc7bd35d53b33abc4dc5a048b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Jul 2009 13:19:40 +0200 Subject: perf_counter: PERF_SAMPLE_ID and inherited counters Anton noted that for inherited counters the counter-id as provided by PERF_SAMPLE_ID isn't mappable to the id found through PERF_RECORD_ID because each inherited counter gets its own id. His suggestion was to always return the parent counter id, since that is the primary counter id as exposed. However, these inherited counters have a unique identifier so that events like PERF_EVENT_PERIOD and PERF_EVENT_THROTTLE can be specific about which counter gets modified, which is important when trying to normalize the sample streams. This patch removes PERF_EVENT_PERIOD in favour of PERF_SAMPLE_PERIOD, which is more useful anyway, since changing periods became a lot more common than initially thought -- rendering PERF_EVENT_PERIOD the less useful solution (also, PERF_SAMPLE_PERIOD reports the more accurate value, since it reports the value used to trigger the overflow, whereas PERF_EVENT_PERIOD simply reports the requested period changed, which might only take effect on the next cycle). This still leaves us PERF_EVENT_THROTTLE to consider, but since that _should_ be a rare occurrence, and linking it to a primary id is the most useful bit to diagnose the problem, we introduce a PERF_SAMPLE_STREAM_ID, for those few cases where the full reconstruction is important. [Does change the ABI a little, but I see no other way out] Suggested-by: Anton Blanchard Signed-off-by: Peter Zijlstra LKML-Reference: <1248095846.15751.8781.camel@twins> --- include/linux/perf_counter.h | 15 ++----- kernel/perf_counter.c | 92 +++++++++++++++---------------------------- tools/perf/builtin-annotate.c | 24 ----------- tools/perf/builtin-report.c | 24 ----------- 4 files changed, 35 insertions(+), 120 deletions(-) (limited to 'tools') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5e970c7d3fd..bd15d7a5f5c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -120,8 +120,9 @@ enum perf_counter_sample_format { PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, - PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ }; /* @@ -312,16 +313,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 time; * u64 id; - * u64 sample_period; - * }; - */ - PERF_EVENT_PERIOD = 4, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; + * u64 stream_id; * }; */ PERF_EVENT_THROTTLE = 5, @@ -356,6 +348,7 @@ enum perf_event_type { * { u64 time; } && PERF_SAMPLE_TIME * { u64 addr; } && PERF_SAMPLE_ADDR * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 period; } && PERF_SAMPLE_PERIOD * diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e1d6a3aa133..7530588fa5c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -154,6 +154,20 @@ static void unclone_ctx(struct perf_counter_context *ctx) } } +/* + * If we inherit counters we want to return the parent counter id + * to userspace. + */ +static u64 primary_counter_id(struct perf_counter *counter) +{ + u64 id = counter->id; + + if (counter->parent) + id = counter->parent->id; + + return id; +} + /* * Get the perf_counter_context for a task and lock it. * This has to cope with with the fact that until it is locked, @@ -1296,7 +1310,6 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_counter *counter, int enable); -static void perf_log_period(struct perf_counter *counter, u64 period); static void perf_adjust_period(struct perf_counter *counter, u64 events) { @@ -1315,8 +1328,6 @@ static void perf_adjust_period(struct perf_counter *counter, u64 events) if (!sample_period) sample_period = 1; - perf_log_period(counter, sample_period); - hwc->sample_period = sample_period; } @@ -1705,7 +1716,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) values[n++] = counter->total_time_running + atomic64_read(&counter->child_total_time_running); if (counter->attr.read_format & PERF_FORMAT_ID) - values[n++] = counter->id; + values[n++] = primary_counter_id(counter); mutex_unlock(&counter->child_mutex); if (count < n * sizeof(u64)) @@ -1812,8 +1823,6 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) counter->attr.sample_freq = value; } else { - perf_log_period(counter, value); - counter->attr.sample_period = value; counter->hw.sample_period = value; } @@ -2662,6 +2671,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (sample_type & PERF_SAMPLE_ID) header.size += sizeof(u64); + if (sample_type & PERF_SAMPLE_STREAM_ID) + header.size += sizeof(u64); + if (sample_type & PERF_SAMPLE_CPU) { header.size += sizeof(cpu_entry); @@ -2705,7 +2717,13 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (sample_type & PERF_SAMPLE_ADDR) perf_output_put(&handle, data->addr); - if (sample_type & PERF_SAMPLE_ID) + if (sample_type & PERF_SAMPLE_ID) { + u64 id = primary_counter_id(counter); + + perf_output_put(&handle, id); + } + + if (sample_type & PERF_SAMPLE_STREAM_ID) perf_output_put(&handle, counter->id); if (sample_type & PERF_SAMPLE_CPU) @@ -2728,7 +2746,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (sub != counter) sub->pmu->read(sub); - group_entry.id = sub->id; + group_entry.id = primary_counter_id(sub); group_entry.counter = atomic64_read(&sub->count); perf_output_put(&handle, group_entry); @@ -2788,15 +2806,8 @@ perf_counter_read_event(struct perf_counter *counter, } if (counter->attr.read_format & PERF_FORMAT_ID) { - u64 id; - event.header.size += sizeof(u64); - if (counter->parent) - id = counter->parent->id; - else - id = counter->id; - - event.format[i++] = id; + event.format[i++] = primary_counter_id(counter); } ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); @@ -3190,49 +3201,6 @@ void __perf_counter_mmap(struct vm_area_struct *vma) perf_counter_mmap_event(&mmap_event); } -/* - * Log sample_period changes so that analyzing tools can re-normalize the - * event flow. - */ - -struct freq_event { - struct perf_event_header header; - u64 time; - u64 id; - u64 period; -}; - -static void perf_log_period(struct perf_counter *counter, u64 period) -{ - struct perf_output_handle handle; - struct freq_event event; - int ret; - - if (counter->hw.sample_period == period) - return; - - if (counter->attr.sample_type & PERF_SAMPLE_PERIOD) - return; - - event = (struct freq_event) { - .header = { - .type = PERF_EVENT_PERIOD, - .misc = 0, - .size = sizeof(event), - }, - .time = sched_clock(), - .id = counter->id, - .period = period, - }; - - ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0); - if (ret) - return; - - perf_output_put(&handle, event); - perf_output_end(&handle); -} - /* * IRQ throttle logging */ @@ -3246,14 +3214,16 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) struct perf_event_header header; u64 time; u64 id; + u64 stream_id; } throttle_event = { .header = { .type = PERF_EVENT_THROTTLE + 1, .misc = 0, .size = sizeof(throttle_event), }, - .time = sched_clock(), - .id = counter->id, + .time = sched_clock(), + .id = primary_counter_id(counter), + .stream_id = counter->id, }; ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5f9eefecc57..1dba568e194 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -74,20 +74,12 @@ struct fork_event { u32 pid, ppid; }; -struct period_event { - struct perf_event_header header; - u64 time; - u64 id; - u64 sample_period; -}; - typedef union event_union { struct perf_event_header header; struct ip_event ip; struct mmap_event mmap; struct comm_event comm; struct fork_event fork; - struct period_event period; } event_t; @@ -997,19 +989,6 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static int -process_period_event(event_t *event, unsigned long offset, unsigned long head) -{ - dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->period.time, - event->period.id, - event->period.sample_period); - - return 0; -} - static int process_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1025,9 +1004,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_FORK: return process_fork_event(event, offset, head); - - case PERF_EVENT_PERIOD: - return process_period_event(event, offset, head); /* * We dont process them right now but they are fine: */ diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a118bc77286..b20a4b6e31b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -101,13 +101,6 @@ struct fork_event { u32 pid, ppid; }; -struct period_event { - struct perf_event_header header; - u64 time; - u64 id; - u64 sample_period; -}; - struct lost_event { struct perf_event_header header; u64 id; @@ -127,7 +120,6 @@ typedef union event_union { struct mmap_event mmap; struct comm_event comm; struct fork_event fork; - struct period_event period; struct lost_event lost; struct read_event read; } event_t; @@ -1635,19 +1627,6 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static int -process_period_event(event_t *event, unsigned long offset, unsigned long head) -{ - dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->period.time, - event->period.id, - event->period.sample_period); - - return 0; -} - static int process_lost_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1729,9 +1708,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_FORK: return process_fork_event(event, offset, head); - case PERF_EVENT_PERIOD: - return process_period_event(event, offset, head); - case PERF_EVENT_LOST: return process_lost_event(event, offset, head); -- cgit v1.2.3-70-g09d2 From a0541234f89c93f313961ce7b28676e11488a5f0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 22 Jul 2009 23:04:12 +1000 Subject: perf_counter: Improve perf stat and perf record option parsing perf stat and perf record currently look for all options on the command line. This can lead to some confusion: # perf stat ls -l Error: unknown switch `l' While we can work around this by adding '--' before the command, the git option parsing code can stop at the first non option: # perf stat ls -l Performance counter stats for 'ls -l': .... Signed-off-by: Anton Blanchard Signed-off-by: Peter Zijlstra LKML-Reference: <20090722130412.GD9029@kryten> --- tools/perf/builtin-record.c | 3 ++- tools/perf/builtin-stat.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 68a9be0d151..6da09928130 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -664,7 +664,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; - argc = parse_options(argc, argv, options, record_usage, 0); + argc = parse_options(argc, argv, options, record_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target_pid == -1 && !system_wide) usage_with_options(record_usage, options); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 27921a8ce1a..f9510eeeb6c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -511,7 +511,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; - argc = parse_options(argc, argv, options, stat_usage, 0); + argc = parse_options(argc, argv, options, stat_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(stat_usage, options); if (run_count <= 0 || run_count > MAX_RUN) -- cgit v1.2.3-70-g09d2 From dfe5a50461db90fab901cb697eff0d3d2e9fd229 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 20 Jul 2009 22:54:26 -0700 Subject: perf: avoid structure size confusion by using a fixed size for some reason, this structure gets compiled as 36 bytes in some files (the ones that alloacte it) but 40 bytes in others (the ones that use it). The cause is an off_t type that gets a different size in different compilation units for some yet-to-be-explained reason. But the effect is disasterous; the size/offset members of the struct are at different offsets, and result in mostly complete garbage. The parser in perf is so robust that this all gets hidden, and after skipping an certain amount of samples, it recovers.... so this bug is not normally noticed. .... except when you want every sample to be exact. Fix this by just using an explicitly sized type. Signed-off-by: Arjan van de Ven Signed-off-by: Peter Zijlstra LKML-Reference: <4A655917.9080504@linux.intel.com> --- tools/perf/util/header.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index b5ef53ad4c7..bf280449fcf 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -16,7 +16,7 @@ struct perf_header { int frozen; int attrs, size; struct perf_header_attr **attr; - off_t attr_offset; + s64 attr_offset; u64 data_offset; u64 data_size; }; -- cgit v1.2.3-70-g09d2 From 28ac909b49a155856c957d080f8a796b3c1d1f3e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 20 Jul 2009 14:14:12 -0300 Subject: perf symbol: C++ demangling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [acme@doppio ~]$ perf report -s comm,dso,symbol -C firefox -d /usr/lib64/xulrunner-1.9.1/libxul.so | grep :: | head 2.21% [.] nsDeque::Push(void*) 1.78% [.] GraphWalker::DoWalk(nsDeque&) 1.30% [.] GCGraphBuilder::AddNode(void*, nsCycleCollectionParticipant*) 1.27% [.] XPCWrappedNative::CallMethod(XPCCallContext&, XPCWrappedNative::CallMode) 1.18% [.] imgContainer::DrawFrameTo(gfxIImageFrame*, gfxIImageFrame*, nsRect&) 1.13% [.] nsDeque::PopFront() 1.11% [.] nsGlobalWindow::RunTimeout(nsTimeout*) 0.97% [.] nsXPConnect::Traverse(void*, nsCycleCollectionTraversalCallback&) 0.95% [.] nsJSEventListener::cycleCollection::Traverse(void*, nsCycleCollectionTraversalCallback&) 0.95% [.] nsCOMPtr_base::~nsCOMPtr_base() [acme@doppio ~]$ Cc: Pekka Enberg Cc: Vegard Nossum Cc: Paul Mackerras Cc: Frédéric Weisbecker Suggested-by: Clark Williams Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Peter Zijlstra LKML-Reference: <20090720171412.GB10410@ghostprotocols.net> --- tools/perf/Makefile | 2 +- tools/perf/util/symbol.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7822b3d6bac..a5e9b876ca0 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -345,7 +345,7 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) -EXTLIBS = +EXTLIBS = -lbfd # # Platform specific tweaks diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f40266b4845..98aee922acc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -6,9 +6,15 @@ #include #include #include +#include const char *sym_hist_filter; +#ifndef DMGL_PARAMS +#define DMGL_PARAMS (1 << 0) /* Include function args */ +#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ +#endif + static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, u64 obj_start, int verbose) @@ -571,6 +577,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, NULL) != NULL); elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; + const char *name; + char *demangled; u64 obj_start; struct section *section = NULL; int is_label = elf_sym__is_label(&sym); @@ -609,10 +617,19 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, goto out_elf_end; } } + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + name = elf_sym__name(&sym, symstrs); + demangled = bfd_demangle(NULL, name, DMGL_PARAMS | DMGL_ANSI); + if (demangled != NULL) + name = demangled; - f = symbol__new(sym.st_value, sym.st_size, - elf_sym__name(&sym, symstrs), + f = symbol__new(sym.st_value, sym.st_size, name, self->sym_priv_size, obj_start, verbose); + free(demangled); if (!f) goto out_elf_end; -- cgit v1.2.3-70-g09d2 From f6bdafef2ab911f03321fa83d8da1df99878009e Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 21 Jul 2009 12:20:22 -0400 Subject: perf_counter: Add tracepoint support to perf list, perf stat Add support to 'perf list' and 'perf stat' for kernel tracepoints. The implementation creates a 'for_each_subsystem' and 'for_each_event' for easy iteration over the tracepoints. Signed-off-by: Jason Baron Signed-off-by: Peter Zijlstra LKML-Reference: <426129bf9fcc8ee63bb094cf736e7316a7dcd77a.1248190728.git.jbaron@redhat.com> --- tools/perf/util/parse-events.c | 175 ++++++++++++++++++++++++++++++++++++++++- tools/perf/util/util.h | 2 + 2 files changed, 176 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d18c98edd00..5a3cd3a34af 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -12,6 +12,8 @@ int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; +static char default_debugfs_path[] = "/sys/kernel/debug/tracing/events"; + struct event_symbol { u8 type; u64 config; @@ -110,6 +112,88 @@ static unsigned long hw_cache_stat[C(MAX)] = { [C(BPU)] = (CACHE_READ), }; +#define for_each_subsystem(sys_dir, sys_dirent, sys_next, file, st) \ + while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ + if (snprintf(file, MAXPATHLEN, "%s/%s", default_debugfs_path, \ + sys_dirent.d_name) && \ + (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ + (strcmp(sys_dirent.d_name, ".")) && \ + (strcmp(sys_dirent.d_name, ".."))) + +#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ + while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ + if (snprintf(file, MAXPATHLEN, "%s/%s/%s", default_debugfs_path, \ + sys_dirent.d_name, evt_dirent.d_name) && \ + (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ + (strcmp(evt_dirent.d_name, ".")) && \ + (strcmp(evt_dirent.d_name, ".."))) + +#define MAX_EVENT_LENGTH 30 + +static int valid_debugfs_mount(void) +{ + struct statfs st_fs; + + if (statfs(default_debugfs_path, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != (long) DEBUGFS_MAGIC) + return -ENOENT; + return 0; +} + +static char *tracepoint_id_to_name(u64 config) +{ + static char tracepoint_name[2 * MAX_EVENT_LENGTH]; + DIR *sys_dir, *evt_dir; + struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct stat st; + char id_buf[4]; + int fd; + u64 id; + char evt_path[MAXPATHLEN]; + + if (valid_debugfs_mount()) + return "unkown"; + + sys_dir = opendir(default_debugfs_path); + if (!sys_dir) + goto cleanup; + + for_each_subsystem(sys_dir, sys_dirent, sys_next, evt_path, st) { + evt_dir = opendir(evt_path); + if (!evt_dir) + goto cleanup; + for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, + evt_path, st) { + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", + default_debugfs_path, sys_dirent.d_name, + evt_dirent.d_name); + fd = open(evt_path, O_RDONLY); + if (fd < 0) + continue; + if (read(fd, id_buf, sizeof(id_buf)) < 0) { + close(fd); + continue; + } + close(fd); + id = atoll(id_buf); + if (id == config) { + closedir(evt_dir); + closedir(sys_dir); + snprintf(tracepoint_name, 2 * MAX_EVENT_LENGTH, + "%s:%s", sys_dirent.d_name, + evt_dirent.d_name); + return tracepoint_name; + } + } + closedir(evt_dir); + } + +cleanup: + closedir(sys_dir); + return "unkown"; +} + static int is_cache_op_valid(u8 cache_type, u8 cache_op) { if (hw_cache_stat[cache_type] & COP(cache_op)) @@ -177,6 +261,9 @@ char *event_name(int counter) return sw_event_names[config]; return "unknown-software"; + case PERF_TYPE_TRACEPOINT: + return tracepoint_id_to_name(config); + default: break; } @@ -265,6 +352,53 @@ parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) return 1; } +static int parse_tracepoint_event(const char **strp, + struct perf_counter_attr *attr) +{ + const char *evt_name; + char sys_name[MAX_EVENT_LENGTH]; + char id_buf[4]; + int fd; + unsigned int sys_length, evt_length; + u64 id; + char evt_path[MAXPATHLEN]; + + if (valid_debugfs_mount()) + return 0; + + evt_name = strchr(*strp, ':'); + if (!evt_name) + return 0; + + sys_length = evt_name - *strp; + if (sys_length >= MAX_EVENT_LENGTH) + return 0; + + strncpy(sys_name, *strp, sys_length); + sys_name[sys_length] = '\0'; + evt_name = evt_name + 1; + evt_length = strlen(evt_name); + if (evt_length >= MAX_EVENT_LENGTH) + return 0; + + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", default_debugfs_path, + sys_name, evt_name); + fd = open(evt_path, O_RDONLY); + if (fd < 0) + return 0; + + if (read(fd, id_buf, sizeof(id_buf)) < 0) { + close(fd); + return 0; + } + close(fd); + id = atoll(id_buf); + attr->config = id; + attr->type = PERF_TYPE_TRACEPOINT; + *strp = evt_name + evt_length; + return 1; +} + static int check_events(const char *str, unsigned int i) { int n; @@ -374,7 +508,8 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr) */ static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) { - if (!(parse_raw_event(str, attr) || + if (!(parse_tracepoint_event(str, attr) || + parse_raw_event(str, attr) || parse_numeric_event(str, attr) || parse_symbolic_event(str, attr) || parse_generic_hw_event(str, attr))) @@ -422,6 +557,42 @@ static const char * const event_type_descriptors[] = { "Hardware cache event", }; +/* + * Print the events from /tracing/events + */ + +static void print_tracepoint_events(void) +{ + DIR *sys_dir, *evt_dir; + struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct stat st; + char evt_path[MAXPATHLEN]; + + if (valid_debugfs_mount()) + return; + + sys_dir = opendir(default_debugfs_path); + if (!sys_dir) + goto cleanup; + + for_each_subsystem(sys_dir, sys_dirent, sys_next, evt_path, st) { + evt_dir = opendir(evt_path); + if (!evt_dir) + goto cleanup; + for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, + evt_path, st) { + snprintf(evt_path, MAXPATHLEN, "%s:%s", + sys_dirent.d_name, evt_dirent.d_name); + fprintf(stderr, " %-40s [%s]\n", evt_path, + event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); + } + closedir(evt_dir); + } + +cleanup: + closedir(sys_dir); +} + /* * Print the help text for the event symbols: */ @@ -472,5 +643,7 @@ void print_events(void) "rNNN"); fprintf(stderr, "\n"); + print_tracepoint_events(); + exit(129); } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b4be6071c10..68fe157d72f 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,7 @@ #include #include #include +#include "../../../include/linux/magic.h" #ifndef NO_ICONV #include -- cgit v1.2.3-70-g09d2 From 5beeded123c5befa21f1c6e16219f2a3eb7dd197 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 21 Jul 2009 14:16:29 -0400 Subject: perf_counter: Detect debugfs location If "/sys/kernel/debug" is not a debugfs mount point, search for the debugfs filesystem in /proc/mounts, but also allows the user to specify '--debugfs-dir=blah' or set the environment variable: 'PERF_DEBUGFS_DIR' Signed-off-by: Jason Baron [ also made it probe "/debug" by default ] Signed-off-by: Peter Zijlstra LKML-Reference: <20090721181629.GA3094@redhat.com> --- tools/perf/perf.c | 77 +++++++++++++++++++++++++++++++++++++++++- tools/perf/util/cache.h | 1 + tools/perf/util/parse-events.c | 33 +++++++++--------- tools/perf/util/parse-events.h | 5 +++ tools/perf/util/string.h | 3 ++ 5 files changed, 102 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf.c b/tools/perf/perf.c index c5656784c61..31982ad064b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -12,6 +12,8 @@ #include "util/cache.h" #include "util/quote.h" #include "util/run-command.h" +#include "util/parse-events.h" +#include "util/string.h" const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; @@ -25,6 +27,8 @@ struct pager_config { int val; }; +static char debugfs_mntpt[MAXPATHLEN]; + static int pager_command_config(const char *var, const char *value, void *data) { struct pager_config *c = data; @@ -56,6 +60,15 @@ static void commit_pager_choice(void) { } } +static void set_debugfs_path(void) +{ + char *path; + + path = getenv(PERF_DEBUGFS_ENVIRONMENT); + snprintf(debugfs_path, MAXPATHLEN, "%s/%s", path ?: debugfs_mntpt, + "tracing/events"); +} + static int handle_options(const char*** argv, int* argc, int* envchanged) { int handled = 0; @@ -122,6 +135,22 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); if (envchanged) *envchanged = 1; + } else if (!strcmp(cmd, "--debugfs-dir")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --debugfs-dir.\n"); + usage(perf_usage_string); + } + strncpy(debugfs_mntpt, (*argv)[1], MAXPATHLEN); + debugfs_mntpt[MAXPATHLEN - 1] = '\0'; + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; + } else if (!prefixcmp(cmd, "--debugfs-dir=")) { + strncpy(debugfs_mntpt, cmd + 14, MAXPATHLEN); + debugfs_mntpt[MAXPATHLEN - 1] = '\0'; + if (envchanged) + *envchanged = 1; } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -228,6 +257,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (use_pager == -1 && p->option & USE_PAGER) use_pager = 1; commit_pager_choice(); + set_debugfs_path(); status = p->fn(argc, argv, prefix); if (status) @@ -346,6 +376,49 @@ static int run_argv(int *argcp, const char ***argv) return done_alias; } +/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ +static void get_debugfs_mntpt(void) +{ + FILE *file; + char fs_type[100]; + char debugfs[MAXPATHLEN]; + + /* + * try the standard location + */ + if (valid_debugfs_mount("/sys/kernel/debug/") == 0) { + strcpy(debugfs_mntpt, "/sys/kernel/debug/"); + return; + } + + /* + * try the sane location + */ + if (valid_debugfs_mount("/debug/") == 0) { + strcpy(debugfs_mntpt, "/debug/"); + return; + } + + /* + * give up and parse /proc/mounts + */ + file = fopen("/proc/mounts", "r"); + if (file == NULL) + return; + + while (fscanf(file, "%*s %" + STR(MAXPATHLEN) + "s %99s %*s %*d %*d\n", + debugfs, fs_type) == 2) { + if (strcmp(fs_type, "debugfs") == 0) + break; + } + fclose(file); + if (strcmp(fs_type, "debugfs") == 0) { + strncpy(debugfs_mntpt, debugfs, MAXPATHLEN); + debugfs_mntpt[MAXPATHLEN - 1] = '\0'; + } +} int main(int argc, const char **argv) { @@ -354,7 +427,8 @@ int main(int argc, const char **argv) cmd = perf_extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; - + /* get debugfs mount point from /proc/mounts */ + get_debugfs_mntpt(); /* * "perf-xxxx" is the same as "perf xxxx", but we obviously: * @@ -377,6 +451,7 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); + set_debugfs_path(); if (argc > 0) { if (!prefixcmp(argv[0], "--")) argv[0] += 2; diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 161d5f413e2..4b50c412b9c 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -18,6 +18,7 @@ #define PERFATTRIBUTES_FILE ".perfattributes" #define INFOATTRIBUTES_FILE "info/attributes" #define ATTRIBUTE_MACRO_PREFIX "[attr]" +#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a3cd3a34af..7bdad8df22a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -5,6 +5,7 @@ #include "parse-events.h" #include "exec_cmd.h" #include "string.h" +#include "cache.h" extern char *strcasestr(const char *haystack, const char *needle); @@ -12,8 +13,6 @@ int nr_counters; struct perf_counter_attr attrs[MAX_COUNTERS]; -static char default_debugfs_path[] = "/sys/kernel/debug/tracing/events"; - struct event_symbol { u8 type; u64 config; @@ -21,6 +20,8 @@ struct event_symbol { char *alias; }; +char debugfs_path[MAXPATHLEN]; + #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x @@ -114,27 +115,27 @@ static unsigned long hw_cache_stat[C(MAX)] = { #define for_each_subsystem(sys_dir, sys_dirent, sys_next, file, st) \ while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ - if (snprintf(file, MAXPATHLEN, "%s/%s", default_debugfs_path, \ - sys_dirent.d_name) && \ + if (snprintf(file, MAXPATHLEN, "%s/%s", debugfs_path, \ + sys_dirent.d_name) && \ (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ (strcmp(sys_dirent.d_name, ".")) && \ (strcmp(sys_dirent.d_name, ".."))) #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ - if (snprintf(file, MAXPATHLEN, "%s/%s/%s", default_debugfs_path, \ - sys_dirent.d_name, evt_dirent.d_name) && \ + if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ + sys_dirent.d_name, evt_dirent.d_name) && \ (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ (strcmp(evt_dirent.d_name, ".")) && \ (strcmp(evt_dirent.d_name, ".."))) #define MAX_EVENT_LENGTH 30 -static int valid_debugfs_mount(void) +int valid_debugfs_mount(const char *debugfs) { struct statfs st_fs; - if (statfs(default_debugfs_path, &st_fs) < 0) + if (statfs(debugfs, &st_fs) < 0) return -ENOENT; else if (st_fs.f_type != (long) DEBUGFS_MAGIC) return -ENOENT; @@ -152,10 +153,10 @@ static char *tracepoint_id_to_name(u64 config) u64 id; char evt_path[MAXPATHLEN]; - if (valid_debugfs_mount()) + if (valid_debugfs_mount(debugfs_path)) return "unkown"; - sys_dir = opendir(default_debugfs_path); + sys_dir = opendir(debugfs_path); if (!sys_dir) goto cleanup; @@ -166,7 +167,7 @@ static char *tracepoint_id_to_name(u64 config) for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, evt_path, st) { snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", - default_debugfs_path, sys_dirent.d_name, + debugfs_path, sys_dirent.d_name, evt_dirent.d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) @@ -363,7 +364,7 @@ static int parse_tracepoint_event(const char **strp, u64 id; char evt_path[MAXPATHLEN]; - if (valid_debugfs_mount()) + if (valid_debugfs_mount(debugfs_path)) return 0; evt_name = strchr(*strp, ':'); @@ -381,8 +382,8 @@ static int parse_tracepoint_event(const char **strp, if (evt_length >= MAX_EVENT_LENGTH) return 0; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", default_debugfs_path, - sys_name, evt_name); + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, + sys_name, evt_name); fd = open(evt_path, O_RDONLY); if (fd < 0) return 0; @@ -568,10 +569,10 @@ static void print_tracepoint_events(void) struct stat st; char evt_path[MAXPATHLEN]; - if (valid_debugfs_mount()) + if (valid_debugfs_mount(debugfs_path)) return; - sys_dir = opendir(default_debugfs_path); + sys_dir = opendir(debugfs_path); if (!sys_dir) goto cleanup; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e3d552908e6..1ea5d09b6eb 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -3,6 +3,8 @@ * Parse symbolic events/counts passed in as options: */ +struct option; + extern int nr_counters; extern struct perf_counter_attr attrs[MAX_COUNTERS]; @@ -15,3 +17,6 @@ extern int parse_events(const struct option *opt, const char *str, int unset); extern void print_events(void); +extern char debugfs_path[]; +extern int valid_debugfs_mount(const char *debugfs); + diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 3dca2f654cd..bf39dfadfd2 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -5,4 +5,7 @@ int hex2u64(const char *ptr, u64 *val); +#define _STR(x) #x +#define STR(x) _STR(x) + #endif -- cgit v1.2.3-70-g09d2 From d20ff6bd6bba2e7e6681fa17565347b410c46ab3 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 20 Jul 2009 14:01:38 +0200 Subject: perf_counter tools: Fix vmlinux symbol generation breakage vmlinux meets the criteria for symbol adjustment, which breaks vmlinux generated symbols. Fix this by exempting vmlinux. This is a bit fragile in that someone could change the kernel dso's name, but currently that name is also hardwired. Signed-off-by: Mike Galbraith Cc: Ingo Molnar Signed-off-by: Peter Zijlstra LKML-Reference: <1248091298.18702.18.camel@marge.simson.net> --- tools/perf/util/symbol.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 98aee922acc..28106059bf1 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -525,7 +525,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, GElf_Sym sym; Elf_Scn *sec, *sec_strndx; Elf *elf; - int nr = 0; + int nr = 0, kernel = !strcmp("[kernel]", self->name); elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { @@ -571,10 +571,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - self->adjust_symbols = (ehdr.e_type == ET_EXEC || + if (!kernel) { + self->adjust_symbols = (ehdr.e_type == ET_EXEC || elf_section_by_name(elf, &ehdr, &shdr, ".gnu.prelink_undo", NULL) != NULL); + } else self->adjust_symbols = 0; + elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; const char *name; -- cgit v1.2.3-70-g09d2 From 0fdc7e67dd312986e30b861adff48732bd33eb3f Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 21 Jul 2009 10:30:36 +0200 Subject: perf_counter tools: Give perf top inherit option Currently, perf top -p only tracks the pid provided, which isn't very useful for watching forky loads, so give it an inherit option. Signed-off-by: Mike Galbraith Cc: Ingo Molnar Signed-off-by: Peter Zijlstra LKML-Reference: <1248165036.9795.10.camel@marge.simson.net> --- tools/perf/builtin-top.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 95d5c0ae375..c0a423004e1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -58,6 +58,7 @@ static u64 count_filter = 5; static int print_entries = 15; static int target_pid = -1; +static int inherit = 0; static int profile_cpu = -1; static int nr_cpus = 0; static unsigned int realtime_prio = 0; @@ -549,7 +550,7 @@ int group_fd; static void start_counter(int i, int counter) { struct perf_counter_attr *attr; - unsigned int cpu; + int cpu; cpu = profile_cpu; if (target_pid == -1 && profile_cpu == -1) @@ -559,6 +560,7 @@ static void start_counter(int i, int counter) attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; attr->freq = freq; + attr->inherit = (cpu < 0) && inherit; try_again: fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); @@ -685,6 +687,8 @@ static const struct option options[] = { "only display functions with more events than this"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), + OPT_BOOLEAN('i', "inherit", &inherit, + "child tasks inherit counters"), OPT_STRING('s', "sym-filter", &sym_filter, "pattern", "only display symbols matchig this pattern"), OPT_BOOLEAN('z', "zero", &zero, -- cgit v1.2.3-70-g09d2 From 2d1b6949d2c855f195de0f5146625015ecca3944 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 1 Aug 2009 13:15:36 +0200 Subject: perf_counter tools: Fix link errors with older toolchains On older distros (F8 for example) the perf build could fail with such missing symbols: LINK perf /usr/lib/gcc/x86_64-redhat-linux/4.3.2/../../../../lib64/libbfd.a(bfd.o): In function `bfd_demangle': (.text+0x2b3): undefined reference to `cplus_demangle' /usr/lib/gcc/x86_64-redhat-linux/4.3.2/../../../../lib64/libbfd.a(bfd.o): In function `bfd_demangle': Link in -liberty too. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a5e9b876ca0..4b20fa47c3a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -345,7 +345,7 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) -EXTLIBS = -lbfd +EXTLIBS = -lbfd -liberty # # Platform specific tweaks -- cgit v1.2.3-70-g09d2 From 27d028de64bd7e1f8e72bdeae6b0586939574fcb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2009 16:52:41 +0200 Subject: perf report: Update for the new FORK/EXIT events Since FORK is now also issued for threads, detect those by comparing the parent and child PID. Teach it about EXIT events and ignore them. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b20a4b6e31b..95fd06cdaa9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -99,6 +99,7 @@ struct comm_event { struct fork_event { struct perf_event_header header; u32 pid, ppid; + u32 tid, ptid; }; struct lost_event { @@ -1608,15 +1609,27 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) +process_task_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->fork.pid); struct thread *parent = threads__findnew(event->fork.ppid); - dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + dprintf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); + event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", + event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); + + /* + * A thread clone will have the same PID for both + * parent and child. + */ + if (thread == parent) + return 0; + + if (event->header.type == PERF_EVENT_EXIT) + return 0; if (!thread || !parent || thread__fork(thread, parent)) { dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); @@ -1706,7 +1719,8 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return process_comm_event(event, offset, head); case PERF_EVENT_FORK: - return process_fork_event(event, offset, head); + case PERF_EVENT_EXIT: + return process_task_event(event, offset, head); case PERF_EVENT_LOST: return process_lost_event(event, offset, head); -- cgit v1.2.3-70-g09d2 From 9b30a26bf3d2c56dcb1c3afaca28b73fcd6ed405 Mon Sep 17 00:00:00 2001 From: Stoyan Gaydarov Date: Thu, 30 Jul 2009 05:25:29 -0500 Subject: perf tools: Fix faulty check This patch fixes a spelling error that has resulted from copy and pasting. The location of the error was found using a semantic patch but the semantic patch was not trying to find these errors. After looking things over it seemed logical that this change was needed. Please review it and then include the patch if it is in fact the correct change. Signed-off-by: Stoyan Gaydarov Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1248949529-20891-1-git-send-email-sgayda2@uiuc.edu> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 28106059bf1..b4fe0579bd6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -565,7 +565,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, goto out_elf_end; secstrs = elf_getdata(sec_strndx, NULL); - if (symstrs == NULL) + if (secstrs == NULL) goto out_elf_end; nr_syms = shdr.sh_size / shdr.sh_entsize; -- cgit v1.2.3-70-g09d2 From 59b9005692d4c8b5d73cfc41aa7229f47be163a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 26 Jul 2009 19:06:19 -0300 Subject: perf top: Add mwait_idle_with_hints to skip_symbols[] We skip the display of idle routine related symbols because they are typically rather erratic and confusing: they depend on the IRQ rate or sometimes they dominate the profile if they are polling based. Add mwait_idle_with_hints too, this is one of the idle routines on x86. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c0a423004e1..f139f1ab933 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -285,6 +285,7 @@ static const char *skip_symbols[] = { "enter_idle", "exit_idle", "mwait_idle", + "mwait_idle_with_hints", "ppc64_runlatch_off", "pseries_dedicated_idle_sleep", NULL -- cgit v1.2.3-70-g09d2 From 7e030655dda5b5efc4305e2a8f46c4967d32eb3d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 2 Aug 2009 13:43:11 +0200 Subject: perf: Fix read buffer overflow Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Cc: a.p.zijlstra@chello.nl Cc: Andrew Morton LKML-Reference: <4A757BCF.40101@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- tools/perf/util/quote.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95fd06cdaa9..ce4f28645e6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -253,7 +253,7 @@ static int strcommon(const char *pathname) { int n = 0; - while (pathname[n] == cwd[n] && n < cwdlen) + while (n < cwdlen && pathname[n] == cwd[n]) ++n; return n; diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index c6e5dc0dc82..2726fe40eb5 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -318,7 +318,7 @@ char *quote_path_relative(const char *in, int len, strbuf_addch(out, '"'); if (prefix) { int off = 0; - while (prefix[off] && off < len && prefix[off] == in[off]) + while (off < len && prefix[off] && prefix[off] == in[off]) if (prefix[off] == '/') { prefix += off + 1; in += off + 1; -- cgit v1.2.3-70-g09d2 From 114cfab222233f50f46d7162cf7d99fdc6c271e5 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 5 Aug 2009 13:25:21 +0300 Subject: perf report: Make --sort comm,dso,symbol the default If you're doing performance testing, you're interested in the symbols anyway so lets make "--sort comm,dso,symbol" the default sort option. Signed-off-by: Pekka Enberg Acked-by: Peter Zijlstra Cc: acme@redhat.com LKML-Reference: <1249467921-10450-1-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ce4f28645e6..8cb58d68a00 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -31,7 +31,7 @@ static char const *input_name = "perf.data"; static char *vmlinux = NULL; -static char default_sort_order[] = "comm,dso"; +static char default_sort_order[] = "comm,dso,symbol"; static char *sort_order = default_sort_order; static char *dso_list_str, *comm_list_str, *sym_list_str, *col_width_list_str; @@ -1424,7 +1424,7 @@ print_entries: if (sort_order == default_sort_order && parent_pattern == default_parent_pattern) { fprintf(fp, "#\n"); - fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); + fprintf(fp, "# (For a higher level overview, try: perf report --sort comm,dso)\n"); fprintf(fp, "#\n"); } fprintf(fp, "\n"); -- cgit v1.2.3-70-g09d2 From 2cdbc46d7b2cb0acb68c3ecad93b000552121fa6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Aug 2009 14:05:16 +0200 Subject: perf: Auto-detect libbfd Since the C++ demangling isn't needed for everybody and bfd/iberty aren't widely/easily available on all machines, make it optional. It also allows you to forcefully disable demangling by using NO_DEMANGLE=1 and otherwise tries to detect libbfd/libiberty combinations that result in a compiling demangler. Reported-by: Jens Axboe Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Kyle McMartin LKML-Reference: <20090801082048.GX12579@kernel.dk> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 18 +++++++++++++++++- tools/perf/util/symbol.c | 9 +++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4b20fa47c3a..ff905aceb4f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -345,7 +345,6 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) -EXTLIBS = -lbfd -liberty # # Platform specific tweaks @@ -374,6 +373,23 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif +ifdef NO_DEMANGLE + BASIC_CFLAGS += -DNO_DEMANGLE +else + + has_bfd := $(shell sh -c "(echo '\#include '; echo '\#include '; echo '\#ifndef DMGL_PARAMS'; echo '\#define DMGL_PARAMS (1 << 0)'; echo '\#define DMGL_ANSI (1 << 1)'; echo '\#endif'; echo 'int main(int argc, char **argv) { bfd_demangle(NULL, argv[0], DMGL_PARAMS | DMGL_ANSI); return 0; }') | gcc -x c - -lbfd > /dev/null 2>&1 && echo y") + + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo '\#include '; echo '\#ifndef DMGL_PARAMS'; echo '\#define DMGL_PARAMS (1 << 0)'; echo '\#define DMGL_ANSI (1 << 1)'; echo '\#endif'; echo 'int main(int argc, char **argv) { bfd_demangle(NULL, argv[0], DMGL_PARAMS | DMGL_ANSI); return 0; }') | gcc -x c - -lbfd -liberty > /dev/null 2>&1 && echo y") + + ifeq ($(has_bfd),y) + EXTLIBS += -lbfd + else ifeq ($(has_bfd_iberty),y) + EXTLIBS += -lbfd -liberty + else + BASIC_CFLAGS += -DNO_DEMANGLE + endif +endif + ifndef CC_LD_DYNPATH ifdef NO_R_TO_GCC_LINKER # Some gcc does not accept and pass -R to the linker to specify diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b4fe0579bd6..0580b94785e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -6,7 +6,16 @@ #include #include #include + +#ifndef NO_DEMANGLE #include +#else +static inline +char *bfd_demangle(void __used *v, const char __used *c, int __used i) +{ + return NULL; +} +#endif const char *sym_hist_filter; -- cgit v1.2.3-70-g09d2 From 386c0b702b1ea81c0f54f5c9832a3d4a52270f14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Aug 2009 10:04:53 -0300 Subject: perf report: Add missing command line options to man page Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <20090805130453.GC10688@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 60 ++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1dbc1eeb4c0..6be696b0a2b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -29,13 +29,67 @@ OPTIONS Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. -a:: - system-wide collection + System-wide collection. -l:: - scale counter values + Scale counter values. + +-p:: +--pid=:: + Record events on existing pid. + +-r:: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. +-A:: +--append:: + Append to the output file to do incremental profiling. + +-f:: +--force:: + Overwrite existing data file. + +-c:: +--count=:: + Event period to sample. + +-o:: +--output=:: + Output file name. + +-i:: +--inherit:: + Child tasks inherit counters. +-F:: +--freq=:: + Profile at this frequency. + +-m:: +--mmap-pages=:: + Number of mmap data pages. + +-g:: +--call-graph:: + Do call-graph (stack chain/backtrace) recording. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-s:: +--stat:: + Per thread counts. + +-d:: +--data:: + Sample addresses. + +-n:: +--no-samples:: + Don't sample. SEE ALSO -------- -- cgit v1.2.3-70-g09d2 From 4d1e00a8af426500edfb8643fa6c375b89f1f804 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Aug 2009 19:02:49 -0300 Subject: perf symbol: Fix symbol parsing in certain cases: use the build-id as a symlink In some cases distros have binaries and debuginfo in weird places: [root@doppio tuna]# ls -la /usr/lib64/{xulrunner-1.9.1/xulrunner-stub,firefox-3.5.2/firefox} -rwxr-xr-x 1 root root 90024 2009-08-03 19:45 /usr/lib64/firefox-3.5.2/firefox -rwxr-xr-x 1 root root 90024 2009-08-03 18:23 /usr/lib64/xulrunner-1.9.1/xulrunner-stub [root@doppio tuna]# sha1sum /usr/lib64/{xulrunner-1.9.1/xulrunner-stub,firefox-3.5.2/firefox} 19a858077d263d5de22c9c5da250d3e4396ae739 /usr/lib64/xulrunner-1.9.1/xulrunner-stub 19a858077d263d5de22c9c5da250d3e4396ae739 /usr/lib64/firefox-3.5.2/firefox [root@doppio tuna]# rpm -qf /usr/lib64/{xulrunner-1.9.1/xulrunner-stub,firefox-3.5.2/firefox} xulrunner-1.9.1.2-1.fc11.x86_64 firefox-3.5.2-2.fc11.x86_64 [root@doppio tuna]# ls -la /usr/lib/debug/{usr/lib64/xulrunner-1.9.1/xulrunner-stub,usr/lib64/firefox-3.5.2/firefox}.debug ls: cannot access /usr/lib/debug/usr/lib64/firefox-3.5.2/firefox.debug: No such file or directory -rwxr-xr-x 1 root root 403608 2009-08-03 18:22 /usr/lib/debug/usr/lib64/xulrunner-1.9.1/xulrunner-stub.debug Seemingly we don't have a .symtab when we actually can find it if we use the .note.gnu.build-id ELF section put in place by some distros. Use it and find the symbols we need. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0580b94785e..16ddca20294 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -661,10 +661,69 @@ out_close: return err; } +#define BUILD_ID_SIZE 128 + +static char *dso__read_build_id(struct dso *self, int verbose) +{ + int i; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *build_id_data; + Elf_Scn *sec; + char *build_id = NULL, *bid; + unsigned char *raw; + Elf *elf; + int fd = open(self->name, O_RDONLY); + + if (fd < 0) + goto out; + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + if (verbose) + fprintf(stderr, "%s: cannot read %s ELF file.\n", + __func__, self->name); + goto out_close; + } + + if (gelf_getehdr(elf, &ehdr) == NULL) { + if (verbose) + fprintf(stderr, "%s: cannot get elf header.\n", __func__); + goto out_elf_end; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL); + if (sec == NULL) + goto out_elf_end; + + build_id_data = elf_getdata(sec, NULL); + if (build_id_data == NULL) + goto out_elf_end; + build_id = malloc(BUILD_ID_SIZE); + if (build_id == NULL) + goto out_elf_end; + raw = build_id_data->d_buf + 16; + bid = build_id; + + for (i = 0; i < 20; ++i) { + sprintf(bid, "%02x", *raw); + ++raw; + bid += 2; + } + if (verbose) + printf("%s(%s): %s\n", __func__, self->name, build_id); +out_elf_end: + elf_end(elf); +out_close: + close(fd); +out: + return build_id; +} + int dso__load(struct dso *self, symbol_filter_t filter, int verbose) { - int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug"); - char *name = malloc(size); + int size = PATH_MAX; + char *name = malloc(size), *build_id = NULL; int variant = 0; int ret = -1; int fd; @@ -686,7 +745,18 @@ more: case 1: /* Ubuntu */ snprintf(name, size, "/usr/lib/debug%s", self->name); break; - case 2: /* Sane people */ + case 2: + build_id = dso__read_build_id(self, verbose); + if (build_id != NULL) { + snprintf(name, size, + "/usr/lib/debug/.build-id/%.2s/%s.debug", + build_id, build_id + 2); + free(build_id); + break; + } + variant++; + /* Fall thru */ + case 3: /* Sane people */ snprintf(name, size, "%s", self->name); break; -- cgit v1.2.3-70-g09d2 From 9424edc2da097c8589fcc24a72552d33e54be161 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Aug 2009 14:05:16 +0200 Subject: perf: Auto-detect libelf Adds autodetection for libelf as well, and simplifies the libbfd code. Furthermore, fail make with an error when libelf is not found and warn about the lack of libbfd. Also provide an option to build a 32bit version even though you might be running a 64bit kernel. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ff905aceb4f..1916e44b9bb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -158,8 +158,10 @@ uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') # If we're on a 64-bit kernel, use -m64 -ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) - M64 := -m64 +ifndef NO_64BIT + ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) + M64 := -m64 + endif endif # CFLAGS and LDFLAGS are for the users to override from the command line. @@ -373,19 +375,24 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) + msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); +endif + ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include '; echo '\#include '; echo '\#ifndef DMGL_PARAMS'; echo '\#define DMGL_PARAMS (1 << 0)'; echo '\#define DMGL_ANSI (1 << 1)'; echo '\#endif'; echo 'int main(int argc, char **argv) { bfd_demangle(NULL, argv[0], DMGL_PARAMS | DMGL_ANSI); return 0; }') | gcc -x c - -lbfd > /dev/null 2>&1 && echo y") + has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") - has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo '\#include '; echo '\#ifndef DMGL_PARAMS'; echo '\#define DMGL_PARAMS (1 << 0)'; echo '\#define DMGL_ANSI (1 << 1)'; echo '\#endif'; echo 'int main(int argc, char **argv) { bfd_demangle(NULL, argv[0], DMGL_PARAMS | DMGL_ANSI); return 0; }') | gcc -x c - -lbfd -liberty > /dev/null 2>&1 && echo y") + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") ifeq ($(has_bfd),y) EXTLIBS += -lbfd else ifeq ($(has_bfd_iberty),y) EXTLIBS += -lbfd -liberty else + msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) BASIC_CFLAGS += -DNO_DEMANGLE endif endif -- cgit v1.2.3-70-g09d2 From f413cdb80ce00ec1a4d0ab949b5d96c81cae7f75 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Aug 2009 01:25:54 +0200 Subject: perf_counter: Fix/complete ftrace event records sampling This patch implements the kernel side support for ftrace event record sampling. A new counter sampling attribute is added: PERF_SAMPLE_TP_RECORD which requests ftrace events record sampling. In this case if a PERF_TYPE_TRACEPOINT counter is active and a tracepoint fires, we emit the tracepoint binary record to the perfcounter event buffer, as a sample. Result, after setting PERF_SAMPLE_TP_RECORD attribute from perf record: perf record -f -F 1 -a -e workqueue:workqueue_execution perf report -D 0x21e18 [0x48]: event: 9 . . ... raw event: size 72 bytes . 0000: 09 00 00 00 01 00 48 00 d0 c7 00 81 ff ff ff ff ......H........ . 0010: 0a 00 00 00 0a 00 00 00 21 00 00 00 00 00 00 00 ........!...... . 0020: 2b 00 01 02 0a 00 00 00 0a 00 00 00 65 76 65 6e +...........eve . 0030: 74 73 2f 31 00 00 00 00 00 00 00 00 0a 00 00 00 ts/1........... . 0040: e0 b1 31 81 ff ff ff ff ....... . 0x21e18 [0x48]: PERF_EVENT_SAMPLE (IP, 1): 10: 0xffffffff8100c7d0 period: 33 The raw ftrace binary record starts at offset 0020. Translation: struct trace_entry { type = 0x2b = 43; flags = 1; preempt_count = 2; pid = 0xa = 10; tgid = 0xa = 10; } thread_comm = "events/1" thread_pid = 0xa = 10; func = 0xffffffff8131b1e0 = flush_to_ldisc() What will come next? - Userspace support ('perf trace'), 'flight data recorder' mode for perf trace, etc. - The unconditional copy from the profiling callback brings some costs however if someone wants no such sampling to occur, and needs to be fixed in the future. For that we need to have an instant access to the perf counter attribute. This is a matter of a flag to add in the struct ftrace_event. - Take care of the events recursivity! Don't ever try to record a lock event for example, it seems some locking is used in the profiling fast path and lead to a tracing recursivity. That will be fixed using raw spinlock or recursivity protection. - [...] - Profit! :-) Signed-off-by: Frederic Weisbecker Cc: Li Zefan Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Steven Rostedt Cc: Paul Mackerras Cc: Pekka Enberg Cc: Gabriel Munteanu Cc: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 +- include/linux/perf_counter.h | 9 ++- include/trace/ftrace.h | 130 ++++++++++++++++++++++++++++++++----------- kernel/perf_counter.c | 18 +++++- kernel/trace/trace.c | 1 + kernel/trace/trace.h | 4 -- tools/perf/builtin-record.c | 1 + 7 files changed, 126 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d7cd193c227..a81170de7f6 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -89,7 +89,9 @@ enum print_line_t { TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; - +void tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags, + int pc); struct ring_buffer_event * trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e604e6ef72d..a67dd5c5b6d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -121,8 +121,9 @@ enum perf_counter_sample_format { PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_TP_RECORD = 1U << 10, - PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ }; /* @@ -413,6 +414,11 @@ struct perf_callchain_entry { __u64 ip[PERF_MAX_STACK_DEPTH]; }; +struct perf_tracepoint_record { + int size; + char *record; +}; + struct task_struct; /** @@ -681,6 +687,7 @@ struct perf_sample_data { struct pt_regs *regs; u64 addr; u64 period; + void *private; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index fec71f8dbc4..7fb16d90e7b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -353,15 +353,7 @@ static inline int ftrace_get_offsets_##call( \ /* * Generate the functions needed for tracepoint perf_counter support. * - * static void ftrace_profile_(proto) - * { - * extern void perf_tpcounter_event(int, u64, u64); - * u64 __addr = 0, __count = 1; - * - * <-- here we expand the TP_perf_assign() macro - * - * perf_tpcounter_event(event_.id, __addr, __count); - * } + * NOTE: The insertion profile callback (ftrace_profile_) is defined later * * static int ftrace_profile_enable_(struct ftrace_event_call *event_call) * { @@ -381,28 +373,10 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TP_fast_assign -#define TP_fast_assign(args...) - -#undef TP_perf_assign -#define TP_perf_assign(args...) args - -#undef __perf_addr -#define __perf_addr(a) __addr = (a) - -#undef __perf_count -#define __perf_count(c) __count = (c) - #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int, u64, u64); \ - u64 __addr = 0, __count = 1; \ - { assign; } \ - perf_tpcounter_event(event_##call.id, __addr, __count); \ -} \ +static void ftrace_profile_##call(proto); \ \ static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ { \ @@ -422,12 +396,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TP_perf_assign -#define TP_perf_assign(args...) - #endif /* @@ -647,5 +615,99 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* + * Define the insertion callback to profile events + * + * The job is very similar to ftrace_raw_event_ except that we don't + * insert in the ring buffer but in a perf counter. + * + * static void ftrace_profile_(proto) + * { + * struct ftrace_data_offsets_ __maybe_unused __data_offsets; + * struct ftrace_event_call *event_call = &event_; + * extern void perf_tpcounter_event(int, u64, u64, void *, int); + * struct ftrace_raw_##call *entry; + * u64 __addr = 0, __count = 1; + * unsigned long irq_flags; + * int __entry_size; + * int __data_size; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * __data_size = ftrace_get_offsets_(&__data_offsets, args); + * __entry_size = __data_size + sizeof(*entry); + * + * do { + * char raw_data[__entry_size]; <- allocate our sample in the stack + * struct trace_entry *ent; + * + * entry = (struct ftrace_raw_ *)raw_data; + * ent = &entry->ent; + * tracing_generic_entry_update(ent, irq_flags, pc); + * ent->type = event_call->id; + * + * <- do some jobs with dynamic arrays + * + * <- affect our values + * + * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * __entry_size); <- submit them to perf counter + * } while (0); + * + * } + */ + +#ifdef CONFIG_EVENT_PROFILE + +#undef __perf_addr +#define __perf_addr(a) __addr = (a) + +#undef __perf_count +#define __perf_count(c) __count = (c) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + struct ftrace_event_call *event_call = &event_##call; \ + extern void perf_tpcounter_event(int, u64, u64, void *, int); \ + struct ftrace_raw_##call *entry; \ + u64 __addr = 0, __count = 1; \ + unsigned long irq_flags; \ + int __entry_size; \ + int __data_size; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ + __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ + \ + do { \ + char raw_data[__entry_size]; \ + struct trace_entry *ent; \ + \ + entry = (struct ftrace_raw_##call *)raw_data; \ + ent = &entry->ent; \ + tracing_generic_entry_update(ent, irq_flags, pc); \ + ent->type = event_call->id; \ + \ + tstruct \ + \ + { assign; } \ + \ + perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + __entry_size); \ + } while (0); \ + \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#endif /* CONFIG_EVENT_PROFILE */ + #undef _TRACE_PROFILE_INIT diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52eb4b68d34..868102172aa 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2646,6 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, u64 counter; } group_entry; struct perf_callchain_entry *callchain = NULL; + struct perf_tracepoint_record *tp; int callchain_size = 0; u64 time; struct { @@ -2714,6 +2715,11 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, header.size += sizeof(u64); } + if (sample_type & PERF_SAMPLE_TP_RECORD) { + tp = data->private; + header.size += tp->size; + } + ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -2777,6 +2783,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } + if (sample_type & PERF_SAMPLE_TP_RECORD) + perf_output_copy(&handle, tp->record, tp->size); + perf_output_end(&handle); } @@ -3703,11 +3712,18 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id, u64 addr, u64 count) +void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, + int entry_size) { + struct perf_tracepoint_record tp = { + .size = entry_size, + .record = record, + }; + struct perf_sample_data data = { .regs = get_irq_regs(), .addr = addr, + .private = &tp, }; if (!data.regs) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8930e39b9d8..c22b40f8f57 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } +EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, int type, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3548ae5cc78..8b9f4f6e955 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags, - int pc); - void default_wait_pipe(struct trace_iterator *iter); void poll_wait_pipe(struct trace_iterator *iter); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6da09928130..90c98082af1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -412,6 +412,7 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; -- cgit v1.2.3-70-g09d2 From 923c42c19944da214d697e312a040384a0e33e78 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 22 Jul 2009 20:36:03 +0200 Subject: perf_counter tools: Fix/resurrect perf top annotation in a simple interactive form perf top used to have annotation support, but it has bitrotted and removed. This patch restores that: it allows the user to select any symbol in kernel space for source level annotation on the fly, switch between event counters and alter display variables. When symbol details are being displayed, stopping annotation reverts to normal. known keys: [d] select display delay. [e] select display entries (lines). [E] select annotation event counter. [f] select normal display count filter. [F] select annotation display count filter (percentage). [qQ] quit. [s] select annotation symbol and start annotation. [S] stop annotation, revert to normal display. [z] toggle event count zeroing. Sample: ------------------------------------------------------------------------------ PerfTop: 16719 irqs/sec kernel:78.7% [cache-misses/cache-references/instructions/cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ Showing cache-misses for e1000_clean_rx_irq Events Pcnt (>=3%) 0 0.0% /* adjust length to remove Ethernet CRC */ 0 0.0% if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) 0 0.0% length -= 4; 436 5.0% f039: 41 f6 84 24 5c 29 00 testb $0x1,0x295c(%r12) 0 0.0% f089: 8b 4d 84 mov -0x7c(%rbp),%ecx 0 0.0% f08c: 48 83 ef 02 sub $0x2,%rdi 0 0.0% f090: 48 83 ee 02 sub $0x2,%rsi 811 9.3% f094: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi) 0 0.0% 0 0.0% while (rx_desc->status & E1000_RXD_STAT_DD) { 0 0.0% f114: 41 f6 47 0c 01 testb $0x1,0xc(%r15) 7226 82.6% f119: 0f 85 24 fe ff ff jne ef43 Available events: 0 cache-misses 1 cache-references 2 instructions 3 cycles Enter details event counter: 2 ------------------------------------------------------------------------------ PerfTop: 15035 irqs/sec kernel:79.0% [cache-misses/cache-references/instructions/cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ Showing instructions for e1000_clean_rx_irq Events Pcnt (>=3%) 0 0.0% int *work_done, int work_to_do) 0 0.0% { 175 0.9% eebf: 55 push %rbp 1898 9.8% eec0: 48 89 e5 mov %rsp,%rbp 0 0.0% 0 0.0% i = rx_ring->next_to_clean; 140 0.7% ef0a: 0f b7 41 1a movzwl 0x1a(%rcx),%eax 670 3.4% ef0e: 89 45 ac mov %eax,-0x54(%rbp) 0 0.0% { 0 0.0% memcpy(skb->data + offset, from, len); 91 0.5% f07b: 49 8b b6 e8 00 00 00 mov 0xe8(%r14),%rsi 1153 5.9% f082: 48 8b b8 e8 00 00 00 mov 0xe8(%rax),%rdi 42 0.2% f089: 8b 4d 84 mov -0x7c(%rbp),%ecx 14 0.1% f08c: 48 83 ef 02 sub $0x2,%rdi 0 0.0% f090: 48 83 ee 02 sub $0x2,%rsi 1618 8.3% f094: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi) 0 0.0% 0 0.0% /* return some buffers to hardware, one at a time is too slow */ 0 0.0% if (cleaned_count >= E1000_RX_BUFFER_WRITE) { 867 4.5% f0e7: 83 7d b0 0f cmpl $0xf,-0x50(%rbp) 0 0.0% 0 0.0% while (rx_desc->status & E1000_RXD_STAT_DD) { 37 0.2% f114: 41 f6 47 0c 01 testb $0x1,0xc(%r15) 4047 20.8% f119: 0f 85 24 fe ff ff jne ef43 Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 496 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 456 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f139f1ab933..d58701346b1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -31,6 +31,8 @@ #include #include +#include +#include #include #include @@ -54,7 +56,7 @@ static int system_wide = 0; static int default_interval = 100000; -static u64 count_filter = 5; +static int count_filter = 5; static int print_entries = 15; static int target_pid = -1; @@ -69,14 +71,26 @@ static int freq = 0; static int verbose = 0; static char *vmlinux = NULL; -static char *sym_filter; -static unsigned long filter_start; -static unsigned long filter_end; - static int delay_secs = 2; static int zero; static int dump_symtab; +/* + * Source + */ + +struct source_line { + u64 eip; + unsigned long count[MAX_COUNTERS]; + char *line; + struct source_line *next; +}; + +static char *sym_filter = NULL; +struct sym_entry *sym_filter_entry = NULL; +static int sym_pcnt_filter = 5; +static int sym_counter = 0; + /* * Symbols */ @@ -91,9 +105,237 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; + struct source_line *source; + struct source_line *lines; + struct source_line **lines_tail; + pthread_mutex_t source_lock; }; -struct sym_entry *sym_filter_entry; +/* + * Source functions + */ + +static void parse_source(struct sym_entry *syme) +{ + struct symbol *sym; + struct module *module; + struct section *section = NULL; + FILE *file; + char command[PATH_MAX*2], *path = vmlinux; + u64 start, end, len; + + if (!syme) + return; + + if (syme->lines) { + pthread_mutex_lock(&syme->source_lock); + goto out_assign; + } + + sym = (struct symbol *)(syme + 1); + module = sym->module; + + if (module) + path = module->path; + if (!path) + return; + + start = sym->obj_start; + if (!start) + start = sym->start; + + if (module) { + section = module->sections->find_section(module->sections, ".text"); + if (section) + start -= section->vma; + } + + end = start + sym->end - sym->start + 1; + len = sym->end - sym->start; + + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); + + file = popen(command, "r"); + if (!file) + return; + + pthread_mutex_lock(&syme->source_lock); + syme->lines_tail = &syme->lines; + while (!feof(file)) { + struct source_line *src; + size_t dummy = 0; + char *c; + + src = malloc(sizeof(struct source_line)); + assert(src != NULL); + memset(src, 0, sizeof(struct source_line)); + + if (getline(&src->line, &dummy, file) < 0) + break; + if (!src->line) + break; + + c = strchr(src->line, '\n'); + if (c) + *c = 0; + + src->next = NULL; + *syme->lines_tail = src; + syme->lines_tail = &src->next; + + if (strlen(src->line)>8 && src->line[8] == ':') { + src->eip = strtoull(src->line, NULL, 16); + if (section) + src->eip += section->vma; + } + if (strlen(src->line)>8 && src->line[16] == ':') { + src->eip = strtoull(src->line, NULL, 16); + if (section) + src->eip += section->vma; + } + } + pclose(file); +out_assign: + sym_filter_entry = syme; + pthread_mutex_unlock(&syme->source_lock); +} + +static void __zero_source_counters(struct sym_entry *syme) +{ + int i; + struct source_line *line; + + line = syme->lines; + while (line) { + for (i = 0; i < nr_counters; i++) + line->count[i] = 0; + line = line->next; + } +} + +static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) +{ + struct source_line *line; + + if (syme != sym_filter_entry) + return; + + if (pthread_mutex_trylock(&syme->source_lock)) + return; + + if (!syme->source) + goto out_unlock; + + for (line = syme->lines; line; line = line->next) { + if (line->eip == ip) { + line->count[counter]++; + break; + } + if (line->eip > ip) + break; + } +out_unlock: + pthread_mutex_unlock(&syme->source_lock); +} + +static void lookup_sym_source(struct sym_entry *syme) +{ + struct symbol *symbol = (struct symbol *)(syme + 1); + struct source_line *line; + char pattern[PATH_MAX]; + char *idx; + + sprintf(pattern, "<%s>:", symbol->name); + + if (symbol->module) { + idx = strstr(pattern, "\t"); + if (idx) + *idx = 0; + } + + pthread_mutex_lock(&syme->source_lock); + for (line = syme->lines; line; line = line->next) { + if (strstr(line->line, pattern)) { + syme->source = line; + break; + } + } + pthread_mutex_unlock(&syme->source_lock); +} + +static void show_lines(struct source_line *queue, int count, int total) +{ + int i; + struct source_line *line; + + line = queue; + for (i = 0; i < count; i++) { + float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; + + printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); + line = line->next; + } +} + +#define TRACE_COUNT 3 + +static void show_details(struct sym_entry *syme) +{ + struct symbol *symbol; + struct source_line *line; + struct source_line *line_queue = NULL; + int displayed = 0; + int line_queue_count = 0, total = 0, more = 0; + + if (!syme) + return; + + if (!syme->source) + lookup_sym_source(syme); + + if (!syme->source) + return; + + symbol = (struct symbol *)(syme + 1); + printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); + printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); + + pthread_mutex_lock(&syme->source_lock); + line = syme->source; + while (line) { + total += line->count[sym_counter]; + line = line->next; + } + + line = syme->source; + while (line) { + float pcnt = 0.0; + + if (!line_queue_count) + line_queue = line; + line_queue_count++; + + if (line->count[sym_counter]) + pcnt = 100.0 * line->count[sym_counter] / (float)total; + if (pcnt >= (float)sym_pcnt_filter) { + if (displayed <= print_entries) + show_lines(line_queue, line_queue_count, total); + else more++; + displayed += line_queue_count; + line_queue_count = 0; + line_queue = NULL; + } else if (line_queue_count > TRACE_COUNT) { + line_queue = line_queue->next; + line_queue_count--; + } + + line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; + line = line->next; + } + pthread_mutex_unlock(&syme->source_lock); + if (more) + printf("%d lines not displayed, maybe increase display entries [e]\n", more); +} struct dso *kernel_dso; @@ -228,6 +470,11 @@ static void print_sym_table(void) printf("------------------------------------------------------------------------------\n\n"); + if (sym_filter_entry) { + show_details(sym_filter_entry); + return; + } + if (nr_counters == 1) printf(" samples pcnt"); else @@ -242,7 +489,7 @@ static void print_sym_table(void) struct symbol *sym = (struct symbol *)(syme + 1); double pcnt; - if (++printed > print_entries || syme->snap_count < count_filter) + if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / @@ -261,19 +508,208 @@ static void print_sym_table(void) } } +static void prompt_integer(int *target, const char *msg) +{ + char *buf = malloc(0), *p; + size_t dummy = 0; + int tmp; + + fprintf(stdout, "\n%s: ", msg); + if (getline(&buf, &dummy, stdin) < 0) + return; + + p = strchr(buf, '\n'); + if (p) + *p = 0; + + p = buf; + while(*p) { + if (!isdigit(*p)) + goto out_free; + p++; + } + tmp = strtoul(buf, NULL, 10); + *target = tmp; +out_free: + free(buf); +} + +static void prompt_percent(int *target, const char *msg) +{ + int tmp = 0; + + prompt_integer(&tmp, msg); + if (tmp >= 0 && tmp <= 100) + *target = tmp; +} + +static void prompt_symbol(struct sym_entry **target, const char *msg) +{ + char *buf = malloc(0), *p; + struct sym_entry *syme = *target, *n, *found = NULL; + size_t dummy = 0; + + /* zero counters of active symbol */ + if (syme) { + pthread_mutex_lock(&syme->source_lock); + __zero_source_counters(syme); + *target = NULL; + pthread_mutex_unlock(&syme->source_lock); + } + + fprintf(stdout, "\n%s: ", msg); + if (getline(&buf, &dummy, stdin) < 0) + goto out_free; + + p = strchr(buf, '\n'); + if (p) + *p = 0; + + pthread_mutex_lock(&active_symbols_lock); + syme = list_entry(active_symbols.next, struct sym_entry, node); + pthread_mutex_unlock(&active_symbols_lock); + + list_for_each_entry_safe_from(syme, n, &active_symbols, node) { + struct symbol *sym = (struct symbol *)(syme + 1); + + if (!strcmp(buf, sym->name)) { + found = syme; + break; + } + } + + if (!found) { + fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); + sleep(1); + return; + } else + parse_source(found); + +out_free: + free(buf); +} + +static void print_known_keys(void) +{ + fprintf(stdout, "\nknown keys:\n"); + fprintf(stdout, "\t[d] select display delay.\n"); + fprintf(stdout, "\t[e] select display entries (lines).\n"); + fprintf(stdout, "\t[E] select annotation event counter.\n"); + fprintf(stdout, "\t[f] select normal display count filter.\n"); + fprintf(stdout, "\t[F] select annotation display count filter (percentage).\n"); + fprintf(stdout, "\t[qQ] quit.\n"); + fprintf(stdout, "\t[s] select annotation symbol and start annotation.\n"); + fprintf(stdout, "\t[S] stop annotation, revert to normal display.\n"); + fprintf(stdout, "\t[z] toggle event count zeroing.\n"); +} + +static void handle_keypress(int c) +{ + int once = 0; +repeat: + switch (c) { + case 'd': + prompt_integer(&delay_secs, "Enter display delay"); + break; + case 'e': + prompt_integer(&print_entries, "Enter display entries (lines)"); + break; + case 'E': + if (nr_counters > 1) { + int i; + + fprintf(stderr, "\nAvailable events:"); + for (i = 0; i < nr_counters; i++) + fprintf(stderr, "\n\t%d %s", i, event_name(i)); + + prompt_integer(&sym_counter, "Enter details event counter"); + + if (sym_counter >= nr_counters) { + fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); + sym_counter = 0; + sleep(1); + } + } else sym_counter = 0; + break; + case 'f': + prompt_integer(&count_filter, "Enter display event count filter"); + break; + case 'F': + prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); + break; + case 'q': + case 'Q': + printf("exiting.\n"); + exit(0); + case 's': + prompt_symbol(&sym_filter_entry, "Enter details symbol"); + break; + case 'S': + if (!sym_filter_entry) + break; + else { + struct sym_entry *syme = sym_filter_entry; + + pthread_mutex_lock(&syme->source_lock); + sym_filter_entry = NULL; + __zero_source_counters(syme); + pthread_mutex_unlock(&syme->source_lock); + } + break; + case 'z': + zero = ~zero; + break; + default: { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + struct termios tc, save; + + if (!once) { + print_known_keys(); + once++; + } + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + tcsetattr(0, TCSANOW, &tc); + + poll(&stdin_poll, 1, -1); + c = getc(stdin); + + tcsetattr(0, TCSAFLUSH, &save); + goto repeat; + } + } +} + static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - int delay_msecs = delay_secs * 1000; - - printf("PerfTop refresh period: %d seconds\n", delay_secs); + struct termios tc, save; + int delay_msecs, c; + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; +repeat: + delay_msecs = delay_secs * 1000; + tcsetattr(0, TCSANOW, &tc); + /* trash return*/ + getc(stdin); do { print_sym_table(); } while (!poll(&stdin_poll, 1, delay_msecs) == 1); - printf("key pressed - exiting.\n"); - exit(0); + c = getc(stdin); + tcsetattr(0, TCSAFLUSH, &save); + + handle_keypress(c); + goto repeat; return NULL; } @@ -293,7 +729,6 @@ static const char *skip_symbols[] = { static int symbol_filter(struct dso *self, struct symbol *sym) { - static int filter_match; struct sym_entry *syme; const char *name = sym->name; int i; @@ -315,6 +750,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym) return 1; syme = dso__sym_priv(self, sym); + pthread_mutex_init(&syme->source_lock, NULL); + if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) + sym_filter_entry = syme; + for (i = 0; skip_symbols[i]; i++) { if (!strcmp(skip_symbols[i], name)) { syme->skip = 1; @@ -322,29 +761,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym) } } - if (filter_match == 1) { - filter_end = sym->start; - filter_match = -1; - if (filter_end - filter_start > 10000) { - fprintf(stderr, - "hm, too large filter symbol <%s> - skipping.\n", - sym_filter); - fprintf(stderr, "symbol filter start: %016lx\n", - filter_start); - fprintf(stderr, " end: %016lx\n", - filter_end); - filter_end = filter_start = 0; - sym_filter = NULL; - sleep(1); - } - } - - if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { - filter_match = 1; - filter_start = sym->start; - } - - return 0; } @@ -380,8 +796,6 @@ out_delete_dso: return -1; } -#define TRACE_COUNT 3 - /* * Binary search in the histogram table and record the hit: */ @@ -394,6 +808,7 @@ static void record_ip(u64 ip, int counter) if (!syme->skip) { syme->count[counter]++; + record_precise_ip(syme, counter, ip); pthread_mutex_lock(&active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); @@ -690,8 +1105,8 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_BOOLEAN('i', "inherit", &inherit, "child tasks inherit counters"), - OPT_STRING('s', "sym-filter", &sym_filter, "pattern", - "only display symbols matchig this pattern"), + OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", + "symbol to annotate - requires -k option"), OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, @@ -734,6 +1149,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) delay_secs = 1; parse_symbols(); + parse_source(sym_filter_entry); /* * Fill in the ones not specifically initialized via -c: -- cgit v1.2.3-70-g09d2 From 46ab976443c6c566c8fe6fc72a6733a55ba9fbea Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 24 Jul 2009 10:09:50 +0200 Subject: perf_counter tools: Allow perf top top users to switch between weighted and individual counter display Add [w]eighted hotkey. Pressing [w] toggles between displaying weighted total of all counters, and the counter selected via [E]vent select key. ------------------------------------------------------------------------------ PerfTop: 90395 irqs/sec kernel:16.1% [cache-misses/cache-references/instructions], (all, 4 CPUs) ------------------------------------------------------------------------------ weight samples pcnt RIP kernel function ______ _______ _____ ________________ _______________ 1275408.6 10881 - 5.3% - ffffffff81146f70 : copy_page_c 553683.4 43569 - 21.3% - ffffffff81146f20 : clear_page_c 74075.0 6768 - 3.3% - ffffffff81147190 : copy_user_generic_string 40602.9 7538 - 3.7% - ffffffff81284ba2 : _spin_lock 26882.1 965 - 0.5% - ffffffff8109d280 : file_ra_state_init [w] ------------------------------------------------------------------------------ PerfTop: 91221 irqs/sec kernel:14.5% [10000Hz cache-misses], (all, 4 CPUs) ------------------------------------------------------------------------------ weight samples pcnt RIP kernel function ______ _______ _____ ________________ _______________ 47320.00 - 22.3% - ffffffff81146f20 : clear_page_c 14261.00 - 6.7% - ffffffff810992f5 : __rmqueue 11046.00 - 5.2% - ffffffff81146f70 : copy_page_c 7842.00 - 3.7% - ffffffff81284ba2 : _spin_lock 7234.00 - 3.4% - ffffffff810aa1d6 : unmap_vmas Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d58701346b1..4eef3465e83 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -90,6 +90,7 @@ static char *sym_filter = NULL; struct sym_entry *sym_filter_entry = NULL; static int sym_pcnt_filter = 5; static int sym_counter = 0; +static int display_weighted = -1; /* * Symbols @@ -354,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym) double weight = sym->snap_count; int counter; + if (!display_weighted) + return weight; + for (counter = 1; counter < nr_counters-1; counter++) weight *= sym->count[counter]; @@ -401,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) static void print_sym_table(void) { int printed = 0, j; - int counter; + int counter, snap = !display_weighted ? sym_counter : 0; float samples_per_sec = samples/delay_secs; float ksamples_per_sec = (samples-userspace_samples)/delay_secs; float sum_ksamples = 0.0; @@ -417,7 +421,7 @@ static void print_sym_table(void) pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - syme->snap_count = syme->count[0]; + syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { syme->weight = sym_weight(syme); rb_insert_active_sym(&tmp, syme); @@ -437,7 +441,7 @@ static void print_sym_table(void) samples_per_sec, 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); - if (nr_counters == 1) { + if (nr_counters == 1 || !display_weighted) { printf("%Ld", (u64)attrs[0].sample_period); if (freq) printf("Hz "); @@ -445,7 +449,9 @@ static void print_sym_table(void) printf(" "); } - for (counter = 0; counter < nr_counters; counter++) { + if (!display_weighted) + printf("%s", event_name(sym_counter)); + else for (counter = 0; counter < nr_counters; counter++) { if (counter) printf("/"); @@ -495,7 +501,7 @@ static void print_sym_table(void) pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - if (nr_counters == 1) + if (nr_counters == 1 || !display_weighted) printf("%20.2f - ", syme->weight); else printf("%9.1f %10ld - ", syme->weight, syme->snap_count); @@ -594,13 +600,14 @@ static void print_known_keys(void) fprintf(stdout, "\nknown keys:\n"); fprintf(stdout, "\t[d] select display delay.\n"); fprintf(stdout, "\t[e] select display entries (lines).\n"); - fprintf(stdout, "\t[E] select annotation event counter.\n"); + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); fprintf(stdout, "\t[f] select normal display count filter.\n"); fprintf(stdout, "\t[F] select annotation display count filter (percentage).\n"); fprintf(stdout, "\t[qQ] quit.\n"); fprintf(stdout, "\t[s] select annotation symbol and start annotation.\n"); fprintf(stdout, "\t[S] stop annotation, revert to normal display.\n"); - fprintf(stdout, "\t[z] toggle event count zeroing.\n"); + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); } static void handle_keypress(int c) @@ -656,6 +663,9 @@ repeat: pthread_mutex_unlock(&syme->source_lock); } break; + case 'w': + display_weighted = ~display_weighted; + break; case 'z': zero = ~zero; break; -- cgit v1.2.3-70-g09d2 From 091bd2e993fcb1094a23e36157285b62bc87afdf Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 4 Aug 2009 10:21:23 +0200 Subject: perf top: Improve interactive key handling Pressing any key which is not currently mapped to functionality, based on startup command line options, displays currently mapped keys, and prompts for input. Pressing any unmapped key at the prompt returns the user to display mode with variables unchanged. eg, pressing ? etc displays currently available keys, the value of the variable associated with that key, and prompts. Pressing same again aborts input. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 108 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4eef3465e83..7de28ce9ca2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -595,25 +595,84 @@ out_free: free(buf); } -static void print_known_keys(void) +static void print_mapped_keys(void) { - fprintf(stdout, "\nknown keys:\n"); - fprintf(stdout, "\t[d] select display delay.\n"); - fprintf(stdout, "\t[e] select display entries (lines).\n"); - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); - fprintf(stdout, "\t[f] select normal display count filter.\n"); - fprintf(stdout, "\t[F] select annotation display count filter (percentage).\n"); - fprintf(stdout, "\t[qQ] quit.\n"); - fprintf(stdout, "\t[s] select annotation symbol and start annotation.\n"); - fprintf(stdout, "\t[S] stop annotation, revert to normal display.\n"); - fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + char *name = NULL; + + if (sym_filter_entry) { + struct symbol *sym = (struct symbol *)(sym_filter_entry+1); + name = sym->name; + } + + fprintf(stdout, "\nMapped keys:\n"); + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); + + if (nr_counters > 1) + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); + + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); + + if (vmlinux) { + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); + fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); + fprintf(stdout, "\t[S] stop annotation.\n"); + } + + if (nr_counters > 1) + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); + fprintf(stdout, "\t[qQ] quit.\n"); +} + +static int key_mapped(int c) +{ + switch (c) { + case 'd': + case 'e': + case 'f': + case 'z': + case 'q': + case 'Q': + return 1; + case 'E': + case 'w': + return nr_counters > 1 ? 1 : 0; + case 'F': + case 's': + case 'S': + return vmlinux ? 1 : 0; + } + + return 0; } static void handle_keypress(int c) { - int once = 0; -repeat: + if (!key_mapped(c)) { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + struct termios tc, save; + + print_mapped_keys(); + fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); + fflush(stdout); + + tcgetattr(0, &save); + tc = save; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + tcsetattr(0, TCSANOW, &tc); + + poll(&stdin_poll, 1, -1); + c = getc(stdin); + + tcsetattr(0, TCSAFLUSH, &save); + if (!key_mapped(c)) + return; + } + switch (c) { case 'd': prompt_integer(&delay_secs, "Enter display delay"); @@ -669,28 +728,6 @@ repeat: case 'z': zero = ~zero; break; - default: { - struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - struct termios tc, save; - - if (!once) { - print_known_keys(); - once++; - } - - tcgetattr(0, &save); - tc = save; - tc.c_lflag &= ~(ICANON | ECHO); - tc.c_cc[VMIN] = 0; - tc.c_cc[VTIME] = 0; - tcsetattr(0, TCSANOW, &tc); - - poll(&stdin_poll, 1, -1); - c = getc(stdin); - - tcsetattr(0, TCSAFLUSH, &save); - goto repeat; - } } } @@ -705,6 +742,7 @@ static void *display_thread(void *arg __used) tc.c_lflag &= ~(ICANON | ECHO); tc.c_cc[VMIN] = 0; tc.c_cc[VTIME] = 0; + repeat: delay_msecs = delay_secs * 1000; tcsetattr(0, TCSANOW, &tc); -- cgit v1.2.3-70-g09d2 From 836179834833272f89098c6d1e1b89e8e69797c2 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 4 Aug 2009 10:24:41 +0200 Subject: perf top: Update man page perf_counter tools: update perf top manual page to reflect current implementation. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-top.txt | 112 ++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 539d0128972..4a7d558dc30 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -3,36 +3,122 @@ perf-top(1) NAME ---- -perf-top - Run a command and profile it +perf-top - System profiling tool. SYNOPSIS -------- [verse] -'perf top' [-e | --event=EVENT] [-l] [-a] +'perf top' [-e | --event=EVENT] [] DESCRIPTION ----------- -This command runs a command and gathers a performance counter profile -from it. +This command generates and displays a performance counter profile in realtime. OPTIONS ------- -...:: - Any command you can specify in a shell. +-a:: +--all-cpus:: + System-wide collection. (default) + +-c :: +--count=:: + Event period to sample. + +-C :: +--CPU=:: + CPU to profile. + +-d :: +--delay=:: + Number of seconds to delay between refreshes. --e:: ---event=:: +-e :: +--event=:: Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. --a:: - system-wide collection +-E :: +--entries=:: + Display this many functions. + +-f :: +--count-filter=:: + Only display functions with more events than this. + +-F :: +--freq=:: + Profile at this frequency. + +-i:: +--inherit:: + Child tasks inherit counters, only makes sens with -p option. + +-k :: +--vmlinux=:: + Path to vmlinux. Required for annotation functionality. + +-m :: +--mmap-pages=:: + Number of mmapped data pages. + +-p :: +--pid=:: + Profile events on existing pid. + +-r :: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. + +-s :: +--sym-annotate=:: + Annotate this symbol. Requires -k option. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-z:: +--zero:: + Zero history across display updates. + +INTERACTIVE PROMPTING KEYS +-------------------------- + +[d]:: + Display refresh delay. + +[e]:: + Number of entries to display. + +[E]:: + Event to display when multiple counters are active. + +[f]:: + Profile display filter (>= hit count). + +[F]:: + Annotation display filter (>= % of total). + +[s]:: + Annotate symbol. + +[S]:: + Stop annotation, return to full profile display. + +[w]:: + Toggle between weighted sum and individual count[E]r profile. + +[z]:: + Toggle event count zeroing across display updates. + +[qQ]:: + Quit. + +Pressing any unmapped key displays a menu, and prompts for input. --l:: - scale counter values SEE ALSO -------- -- cgit v1.2.3-70-g09d2 From 1953287bfe8afcbbd235bd6c42c9df06d52438dc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Aug 2009 07:11:05 +0200 Subject: perf tools: Fix call-chain cumul hit based sub-total (fractal mode) The callchain fractal mode builds each new total hits in a new branch of profiling by using the parent's hits of the current branch plus the hits of the children. This is wrong, the total hits of a branch should be made of the sum of every children hits, we must ignore the parent hits in this scope. This patch also fixes another mistake with the hit counting. Now the rates are correct. Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Pekka Enberg Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 ++-- tools/perf/util/callchain.c | 27 ++++++++++++++++----------- tools/perf/util/callchain.h | 7 ++++++- 3 files changed, 24 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8cb58d68a00..da402e18656 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -901,7 +901,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, int i; if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = self->cumul_hit; + new_total = self->children_hit; else new_total = total_samples; @@ -930,7 +930,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, new_total, - child->cumul_hit); + cumul_hits(child)); } ret += callchain__fprintf_graph(fp, child, new_total, depth + 1, diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9d3c8141b8c..98c5627f327 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -26,10 +26,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct callchain_node *rnode; + u64 chain_cumul = cumul_hits(chain); while (*p) { + u64 rnode_cumul; + parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); + rnode_cumul = cumul_hits(rnode); switch (mode) { case CHAIN_FLAT: @@ -40,7 +44,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - if (rnode->cumul_hit < chain->cumul_hit) + if (rnode_cumul < chain_cumul) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -87,7 +91,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_ABS); } @@ -108,11 +112,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, u64 min_hit; node->rb_root = RB_ROOT; - min_hit = node->cumul_hit * min_percent / 100.0; + min_hit = node->children_hit * min_percent / 100.0; chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); - if (child->cumul_hit >= min_hit) + if (cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_REL); } @@ -211,7 +215,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, new = create_child(parent, false); fill_node(new, chain, start, syms); - new->cumul_hit = new->hit = 1; + new->children_hit = 0; + new->hit = 1; } /* @@ -241,7 +246,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, /* split the hits */ new->hit = parent->hit; - new->cumul_hit = parent->cumul_hit; + new->children_hit = parent->children_hit; + parent->children_hit = cumul_hits(new); new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; @@ -249,6 +255,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, if (idx_total < chain->nr) { parent->hit = 0; add_child(parent, chain, idx_total, syms); + parent->children_hit++; } else { parent->hit = 1; } @@ -269,13 +276,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) - goto cumul; + goto inc_children_hit; } /* nothing in children, add to the current node */ add_child(root, chain, start, syms); -cumul: - root->cumul_hit++; +inc_children_hit: + root->children_hit++; } static int @@ -317,8 +324,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { root->hit++; - root->cumul_hit++; - return 0; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7812122bea1..b2d128e07c8 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -21,7 +21,7 @@ struct callchain_node { struct rb_root rb_root; /* sorted tree of children */ unsigned int val_nr; u64 hit; - u64 cumul_hit; /* hit + hits of children */ + u64 children_hit; }; struct callchain_param; @@ -48,6 +48,11 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } +static inline u64 cumul_hits(struct callchain_node *node) +{ + return node->hit + node->children_hit; +} + int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -- cgit v1.2.3-70-g09d2 From 1c222bce7dd0cb9578b4c5cd3874a74f1db497c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2009 20:57:41 +0200 Subject: perf tools: Fix multi-counter stat bug caused by incorrect reading of perf.data file header Brice Goglin reported that only the first result from a multi-counter perf record --stat run is accurate, the rest looks bogus. A silly mistake made us re-read the first attribute for every recorded attribute. Reported-by: Brice Goglin Signed-off-by: Peter Zijlstra Tested-by: Brice Goglin Cc: paulus@samba.org Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 450384b3bbe..95a44bcfc2d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -213,9 +213,10 @@ struct perf_header *perf_header__read(int fd) for (i = 0; i < nr_attrs; i++) { struct perf_header_attr *attr; - off_t tmp = lseek(fd, 0, SEEK_CUR); + off_t tmp; do_read(fd, &f_attr, sizeof(f_attr)); + tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); -- cgit v1.2.3-70-g09d2 From 8f18aec535b5ca513dd13b531730177d35175ffa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2009 19:40:28 +0200 Subject: perf report: Fix per task mult-counter stat reporting Brice Goglin reported: > I can easily sort them by thread id, but I don't know how to match > my 4 events with each group of 4 lines. Also report the counter id and the time running/enabled stats (in case the counter got time-shared). Reported-by: Brice Goglin Signed-off-by: Peter Zijlstra Tested-by: Brice Goglin Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 31 +++++++++++++++++++++++++++---- tools/perf/util/parse-events.c | 8 +++++++- tools/perf/util/parse-events.h | 1 + 3 files changed, 35 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index da402e18656..84205462e07 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -112,7 +112,9 @@ struct read_event { struct perf_event_header header; u32 pid,tid; u64 value; - u64 format[3]; + u64 time_enabled; + u64 time_running; + u64 id; }; typedef union event_union { @@ -1690,14 +1692,37 @@ static void trace_event(event_t *event) dprintf(".\n"); } +static struct perf_header *header; + +static struct perf_counter_attr *perf_header__find_attr(u64 id) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + int j; + + for (j = 0; j < attr->ids; j++) { + if (attr->id[j] == id) + return &attr->attr; + } + } + + return NULL; +} + static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", + struct perf_counter_attr *attr = perf_header__find_attr(event->read.id); + + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, event->read.tid, + attr ? __event_name(attr->type, attr->config) + : "FAIL", event->read.value); return 0; @@ -1743,8 +1768,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static struct perf_header *header; - static u64 perf_header__sample_type(void) { u64 sample_type = 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7bdad8df22a..f77407b5832 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -223,9 +223,15 @@ char *event_name(int counter) { u64 config = attrs[counter].config; int type = attrs[counter].type; + + return __event_name(type, config); +} + +char *__event_name(int type, u64 config) +{ static char buf[32]; - if (attrs[counter].type == PERF_TYPE_RAW) { + if (type == PERF_TYPE_RAW) { sprintf(buf, "raw 0x%llx", config); return buf; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1ea5d09b6eb..192a962e3a0 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -10,6 +10,7 @@ extern int nr_counters; extern struct perf_counter_attr attrs[MAX_COUNTERS]; extern char *event_name(int ctr); +extern char *__event_name(int type, u64 config); extern int parse_events(const struct option *opt, const char *str, int unset); -- cgit v1.2.3-70-g09d2 From 94cb9e385d5b4d55a5ae389baa10ad2835ea39bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 6 Aug 2009 14:43:17 -0300 Subject: perf report: Add debug help for the finding of symbol bugs - show the symtab origin (DSO, build-id, kernel, etc) Used with perf report --verbose: [acme@doppio linux-2.6-tip]$ perf report -v | head -16 5.17% firefox /usr/lib64/xulrunner-1.9.1/libxul.so 0x00000000005d8eee f [.] imgContainer::DrawFrameTo(gfxIImageFrame*, gfxIImageFrame*, nsRect&) 2.56% firefox /lib64/libpthread-2.10.1.so 0x0000000000008e02 d [.] __pthread_mutex_lock_internal 1.94% firefox /usr/lib64/xulrunner-1.9.1/libxul.so 0x0000000000d0af8f f [.] SearchTable 1.75% firefox [kernel] 0xffffffffff60013b k [.] vread_hpet 1.63% firefox /lib64/libpthread-2.10.1.so 0x000000000000a404 d [.] __pthread_mutex_unlock 1.47% firefox /usr/lib64/xulrunner-1.9.1/libmozjs.so 0x00000000000482ea f [.] js_Interpret 1.42% firefox /usr/lib64/xulrunner-1.9.1/libmozjs.so 0x000000000003eda3 f [.] JS_CallTracer 1.24% firefox [kernel] 0xffffffff8102ca4a k [k] read_hpet 1.16% firefox [kernel] 0xffffffff810f3dd4 k [k] fget_light 1.11% firefox /usr/lib64/xulrunner-1.9.1/libmozjs.so 0x00000000000567ff f [.] js_TraceObject 0.98% firefox /usr/lib64/firefox-3.5.2/firefox 0x000000000000dd23 b [.] arena_ralloc [acme@doppio linux-2.6-tip]$ The new field is just after the symbol address. To help in figuring out symbol resolution bugs. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 3 ++- tools/perf/util/symbol.c | 57 ++++++++++++++++++++++++++++++++++++--------- tools/perf/util/symbol.h | 2 ++ 3 files changed, 50 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 84205462e07..a5e2f8df411 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -700,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) size_t ret = 0; if (verbose) - ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, + dso__symtab_origin(self->dso)); ret += repsep_fprintf(fp, "[%c] ", self->level); if (self->sym) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 16ddca20294..f1dcede1430 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -24,6 +24,16 @@ const char *sym_hist_filter; #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +enum dso_origin { + DSO__ORIG_KERNEL = 0, + DSO__ORIG_JAVA_JIT, + DSO__ORIG_FEDORA, + DSO__ORIG_UBUNTU, + DSO__ORIG_BUILDID, + DSO__ORIG_DSO, + DSO__ORIG_NOT_FOUND, +}; + static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, u64 obj_start, int verbose) @@ -81,6 +91,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) self->sym_priv_size = sym_priv_size; self->find_symbol = dso__find_symbol; self->slen_calculated = 0; + self->origin = DSO__ORIG_NOT_FOUND; } return self; @@ -710,7 +721,7 @@ static char *dso__read_build_id(struct dso *self, int verbose) ++raw; bid += 2; } - if (verbose) + if (verbose >= 2) printf("%s(%s): %s\n", __func__, self->name, build_id); out_elf_end: elf_end(elf); @@ -720,11 +731,26 @@ out: return build_id; } +char dso__symtab_origin(const struct dso *self) +{ + static const char origin[] = { + [DSO__ORIG_KERNEL] = 'k', + [DSO__ORIG_JAVA_JIT] = 'j', + [DSO__ORIG_FEDORA] = 'f', + [DSO__ORIG_UBUNTU] = 'u', + [DSO__ORIG_BUILDID] = 'b', + [DSO__ORIG_DSO] = 'd', + }; + + if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) + return '!'; + return origin[self->origin]; +} + int dso__load(struct dso *self, symbol_filter_t filter, int verbose) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; - int variant = 0; int ret = -1; int fd; @@ -733,19 +759,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) self->adjust_symbols = 0; - if (strncmp(self->name, "/tmp/perf-", 10) == 0) - return dso__load_perf_map(self, filter, verbose); + if (strncmp(self->name, "/tmp/perf-", 10) == 0) { + ret = dso__load_perf_map(self, filter, verbose); + self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : + DSO__ORIG_NOT_FOUND; + return ret; + } + + self->origin = DSO__ORIG_FEDORA - 1; more: do { - switch (variant) { - case 0: /* Fedora */ + self->origin++; + switch (self->origin) { + case DSO__ORIG_FEDORA: snprintf(name, size, "/usr/lib/debug%s.debug", self->name); break; - case 1: /* Ubuntu */ + case DSO__ORIG_UBUNTU: snprintf(name, size, "/usr/lib/debug%s", self->name); break; - case 2: + case DSO__ORIG_BUILDID: build_id = dso__read_build_id(self, verbose); if (build_id != NULL) { snprintf(name, size, @@ -754,16 +787,15 @@ more: free(build_id); break; } - variant++; + self->origin++; /* Fall thru */ - case 3: /* Sane people */ + case DSO__ORIG_DSO: snprintf(name, size, "%s", self->name); break; default: goto out; } - variant++; fd = open(name, O_RDONLY); } while (fd < 0); @@ -899,6 +931,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); + if (err > 0) + self->origin = DSO__ORIG_KERNEL; + return err; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2f92b21c712..1e003ec2f4b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -26,6 +26,7 @@ struct dso { unsigned int sym_priv_size; unsigned char adjust_symbols; unsigned char slen_calculated; + unsigned char origin; char name[0]; }; @@ -49,6 +50,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); int dso__load(struct dso *self, symbol_filter_t filter, int verbose); size_t dso__fprintf(struct dso *self, FILE *fp); +char dso__symtab_origin(const struct dso *self); void symbol__init(void); #endif /* _PERF_SYMBOL_ */ -- cgit v1.2.3-70-g09d2 From b26bc5a7f81474937e427b0c855eabee5ad56f89 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 7 Aug 2009 10:18:39 +0200 Subject: perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale We want to use a coherent flag for -S/--stat across all tools, so free up -S in perf stat. Signed-off-by: Brice Goglin Cc: Peter Zijlstra Cc: paulus@samba.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 0d74346d21a..484080dd5b6 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --S:: +-c:: scale counter values EXAMPLES diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f9510eeeb6c..b4b06c7903e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -496,7 +496,7 @@ static const struct option options[] = { "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, + OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), -- cgit v1.2.3-70-g09d2 From ae07b63f4b6728e1f98aa5c5416cfc1280f59f51 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2009 16:48:54 +0200 Subject: perf list: Fix the output to not include tracepoints without an id Stop perf list from displaying tracepoints without an id file, those are special tracepoints that are not interfaced to perfcounters so listing them is erroneous and passing them as events will produce no output. Signed-off-by: Peter Zijlstra Acked-by: Jason Baron Cc: Steven Rostedt Cc: Chris Mason Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f77407b5832..4858d83b3b6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = { (strcmp(sys_dirent.d_name, ".")) && \ (strcmp(sys_dirent.d_name, ".."))) +static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) +{ + char evt_path[MAXPATHLEN]; + int fd; + + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, + sys_dir->d_name, evt_dir->d_name); + fd = open(evt_path, O_RDONLY); + if (fd < 0) + return -EINVAL; + close(fd); + + return 0; +} + #define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ sys_dirent.d_name, evt_dirent.d_name) && \ (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ (strcmp(evt_dirent.d_name, ".")) && \ - (strcmp(evt_dirent.d_name, ".."))) + (strcmp(evt_dirent.d_name, "..")) && \ + (!tp_event_has_id(&sys_dirent, &evt_dirent))) #define MAX_EVENT_LENGTH 30 -- cgit v1.2.3-70-g09d2 From 7eac7e9e726c1b136bd7e0ad6671ce315f48bb18 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Fri, 7 Aug 2009 14:16:00 +0200 Subject: perf util: Fix do_read() to fail on EOF instead of busy-looping While toying with perf, I've noticed that perf record can easily enter a busy loop when doing something as silly as: $ perf record -A ls Yeah, do_read here really wants to read a known size, not being able to should die(), not busy-loop ;) That was the cause for the bug. Signed-off-by: Pierre Habouzit Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/util/header.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 95a44bcfc2d..b92a457ca32 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size) if (ret < 0) die("failed to read"); + if (ret == 0) + die("failed to read: missing data"); size -= ret; buf += ret; -- cgit v1.2.3-70-g09d2 From 266e0e219888420a1a7cafc82e82891cf7b5a979 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Fri, 7 Aug 2009 14:16:01 +0200 Subject: perf record: Fix the -A UI for empty or non-existent perf.data 1. Ignore the -A argument if there is no perf.data file 2. Treat an empty file like a non existent file. Else, perf will try to read the perf.data header, and fail with an error. Treating an empty file like a non-existent file makes sense, since an interupted (as in SIGKILLed) perf could leave such files around, and you don't want to annoy the user with errors for files with no data in it. Signed-off-by: Pierre Habouzit Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 90c98082af1..0345aad8eba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -525,10 +525,14 @@ static int __cmd_record(int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); - if (!stat(output_name, &st) && !force && !append_file) { - fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", - output_name); - exit(-1); + if (!stat(output_name, &st) && st.st_size) { + if (!force && !append_file) { + fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", + output_name); + exit(-1); + } + } else { + append_file = 0; } flags = O_CREAT|O_RDWR; -- cgit v1.2.3-70-g09d2 From b0efe213f84f7fd5ccfe07053e3d9fb827b7c188 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Aug 2009 02:16:23 +0200 Subject: perf tools: callchain: Fix spurious 'perf report' warnings: ignore empty callchains When the callchain tree comes to insert an empty backtrace, it raises a spurious warning about the fact we are inserting an empty. This is spurious because the radix tree assumes it did something wrong to reach this state. But it didn't, we just met an empty callchain that has to be ignored. This happens occasionally with certain types of call-chain recordings. If it happens it's a big nuisance as perf report output starts with thousands of warning lines. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1249690585-9145-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 98c5627f327..a8e67aa9ef4 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -336,5 +336,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms) { + if (!chain->nr) + return; __append_chain_children(root, chain, syms, 0); } -- cgit v1.2.3-70-g09d2 From b1a88349c37624755b28ac3b3152b48f52c1f487 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Aug 2009 02:16:24 +0200 Subject: perf tools: callchain: Fix 'perf report' display to be callchain by default If we recorded with -g option to record the callchain, right now we require a -g option to perf report as well - and people reported this as unnecessary complication: the user already specified -g once, no need to require it a second time. So if the recording includes call-chains, display the callchain by default from perf report. ( The user can override this default using "-g none" option from perf report. ) Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1249690585-9145-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 16 +++++++++++++++- tools/perf/util/callchain.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a5e2f8df411..c4a8e108e52 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -68,7 +68,7 @@ static int callchain; static struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_ABS, + .mode = CHAIN_GRAPH_REL, .min_percent = 0.5 }; @@ -1836,6 +1836,13 @@ static int __cmd_report(void) " -g?\n"); exit(-1); } + } else if (callchain_param.mode != CHAIN_NONE && !callchain) { + callchain = 1; + if (register_callchain_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain" + " params\n"); + exit(-1); + } } if (load_kernel() < 0) { @@ -1974,6 +1981,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, else if (!strncmp(tok, "fractal", strlen(arg))) callchain_param.mode = CHAIN_GRAPH_REL; + else if (!strncmp(tok, "none", strlen(arg))) { + callchain_param.mode = CHAIN_NONE; + callchain = 0; + + return 0; + } + else return -1; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b2d128e07c8..a926ae4f5a1 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -7,6 +7,7 @@ #include "symbol.h" enum chain_mode { + CHAIN_NONE, CHAIN_FLAT, CHAIN_GRAPH_ABS, CHAIN_GRAPH_REL -- cgit v1.2.3-70-g09d2 From 25446036cbfc2c89faacdb4fb4603943d2197dc6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Aug 2009 02:16:25 +0200 Subject: perf tools: callchain: Fix sum of percentages to be 100% by displaying amount of ignored chains in fractal mode When we filter the callchains below a given percentage, we ignore them and the end result only shows entries that have an upper percentage than the filter threshold. It seems to users then that we have an imbalance in the percentage, as if the sum inside a profiled branch doesn't reach 100%. Since in the past there have been real perf report bugs that showed the same sypmtom, it would be nice to assure the user that the data is perfect and trustable and it all sums up to 100.00%. So fix this by displaying the remaining hits that have been filtered but without more detail than their amount in each branches. Example while filtering below 50%: 7.73% [k] delay_tsc | |--98.22%-- __const_udelay | | | |--86.37%-- ath5k_hw_register_timeout | | ath5k_hw_noise_floor_calibration | | ath5k_hw_reset | | ath5k_reset | | ath5k_config | | ieee80211_hw_config | | | | | |--88.53%-- ieee80211_scan_work | | | worker_thread | | | kthread | | | child_rip | | --11.47%-- [...] | --13.63%-- [...] --1.78%-- [...] Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1249690585-9145-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 47 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c4a8e108e52..99274cec0ad 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -891,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, return ret; } +static struct symbol *rem_sq_bracket; +static struct callchain_list rem_hits; + +static void init_rem_hits(void) +{ + rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); + if (!rem_sq_bracket) { + fprintf(stderr, "Not enough memory to display remaining hits\n"); + return; + } + + strcpy(rem_sq_bracket->name, "[...]"); + rem_hits.sym = rem_sq_bracket; +} + static size_t callchain__fprintf_graph(FILE *fp, struct callchain_node *self, u64 total_samples, int depth, int depth_mask) @@ -900,6 +915,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, struct callchain_list *chain; int new_depth_mask = depth_mask; u64 new_total; + u64 remaining; size_t ret = 0; int i; @@ -908,17 +924,25 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, else new_total = total_samples; + remaining = new_total; + node = rb_first(&self->rb_root); while (node) { + u64 cumul; + child = rb_entry(node, struct callchain_node, rb_node); + cumul = cumul_hits(child); + remaining -= cumul; /* * The depth mask manages the output of pipes that show * the depth. We don't want to keep the pipes of the current - * level for the last child of this depth + * level for the last child of this depth. + * Except if we have remaining filtered hits. They will + * supersede the last child */ next = rb_next(node); - if (!next) + if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) new_depth_mask &= ~(1 << (depth - 1)); /* @@ -933,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, new_total, - cumul_hits(child)); + cumul); } ret += callchain__fprintf_graph(fp, child, new_total, depth + 1, @@ -941,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, node = next; } + if (callchain_param.mode == CHAIN_GRAPH_REL && + remaining && remaining != new_total) { + + if (!rem_sq_bracket) + return ret; + + new_depth_mask &= ~(1 << (depth - 1)); + + ret += ipchain__fprintf_graph(fp, &rem_hits, depth, + new_depth_mask, 0, new_total, + remaining); + } + return ret; } @@ -1361,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) unsigned int width; char *col_width = col_width_list_str; + init_rem_hits(); + fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); fprintf(fp, "#\n"); @@ -1432,6 +1471,8 @@ print_entries: } fprintf(fp, "\n"); + free(rem_sq_bracket); + return ret; } -- cgit v1.2.3-70-g09d2 From c24b513337f06712b8c81eb4f1413d44591dfee7 Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Wed, 5 Aug 2009 20:53:34 +0200 Subject: perf: "Longum est iter per praecepta, breve et efficax per exempla" A few examples of how 'perf' can be used, from an e-mail by Ingo Molnar http://lkml.org/lkml/2009/8/4/346. Signed-off-by: Carlos R. Mafra Cc: Peter Zijlstra Cc: Valdis.Kletnieks@vt.edu LKML-Reference: <20090805185334.GA4535@Pilar.aei.mpg.de> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-examples.txt | 225 +++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 tools/perf/Documentation/perf-examples.txt (limited to 'tools') diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt new file mode 100644 index 00000000000..8eb6c489fb1 --- /dev/null +++ b/tools/perf/Documentation/perf-examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free_direct [Tracepoint event] + kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_pagevec_free + 20934 kmem:mm_page_free_direct + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_pagevec_free ( +- 0.934% ) + 18376 kmem:mm_page_free_direct ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_pagevec_free -e +kmem:mm_page_free_direct sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_pagevec_free + 254115 kmem:mm_page_free_direct + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_pagevec_free ( +- 4.820% ) + 25653 kmem:mm_page_free_direct ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + -- cgit v1.2.3-70-g09d2 From 183f3b0887083d36c8a25cd5e3518906415d1889 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Sat, 8 Aug 2009 14:14:15 +0200 Subject: perf_counter tools: Fix libbfd detection for systems with libz dependency Due to a libz dependency in some distro's binutils package, C++ demangle support isn't compiled in despite the necessary libraries being available. Fix this by adding a -lz link test to the dependency detection rules. Signed-off-by: Mike Galbraith Acked-by: Peter Zijlstra LKML-Reference: <1249733655.6929.5.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1916e44b9bb..60411e94113 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -387,10 +387,14 @@ else has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") + has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") + ifeq ($(has_bfd),y) EXTLIBS += -lbfd else ifeq ($(has_bfd_iberty),y) EXTLIBS += -lbfd -liberty + else ifeq ($(has_bfd_iberty_z),y) + EXTLIBS += -lbfd -liberty -lz else msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) BASIC_CFLAGS += -DNO_DEMANGLE -- cgit v1.2.3-70-g09d2 From c0a8865e32c8d1a562db38e06ef31ef23282f646 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 9 Aug 2009 04:19:15 +0200 Subject: perf tools: callchain: Fix bad rounding of minimum rate Sometimes we get callchain branches that have a rate under the limit given by the user. Say you launched: perf record -f -g -a ./hackbench 10 perf report -g fractal,10.0 And you got: 2.33% hackbench [kernel] [k] _spin_lock_irqsave | |--78.57%-- remove_wait_queue | poll_freewait | do_sys_poll | sys_poll | sysenter_dispatch | 0xf7ffa430 | 0x1ffadea3c | |--7.14%-- __up_read | up_read | do_page_fault | page_fault | 0xf7ffa430 | 0xa0df710000000a ... It is abnormal to get a 7.14% branch whereas we passed a 10% filter. The problem is that we round down the minimum threshold. This happens mostly when we have very low number of events. If the total amount of your branch is 4 and you have a subranch of 3 events, filtering to 90% will be computed like follows: limit = 4 * 0.9; The result is about 3.6, but the cast to integer will round down to 3. It means that our filter is actually of 75% We must then explicitly round up the minimum threshold. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: acme@redhat.com Cc: peterz@infradead.org Cc: efault@gmx.de LKML-Reference: <20090809024235.GA10146@nowhere> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index a8e67aa9ef4..01147341164 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "callchain.h" @@ -112,7 +113,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node, u64 min_hit; node->rb_root = RB_ROOT; - min_hit = node->children_hit * min_percent / 100.0; + min_hit = ceil(node->children_hit * min_percent); chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); @@ -126,7 +127,7 @@ static void sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph_rel(chain_root, param->min_percent); + __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); rb_root->rb_node = chain_root->rb_root.rb_node; } -- cgit v1.2.3-70-g09d2 From 2a8083f063472f27c253545dd64e1a7bbbb1ab61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Aug 2009 16:22:00 -0300 Subject: perf record: Fix .tid and .pid fill-in when synthesizing events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Noticed when trying to record events for a firefox thread. We were synthesizing both .tid and .pid with the pid passed via --pid. Fix it by reading /proc/PID/status and getting the tgid to use in .pid, .tid gets the specified "pid". Signed-off-by: Arnaldo Carvalho de Melo Cc: "H. Peter Anvin" Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20090811192200.GF18061@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 71 ++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0345aad8eba..30b83def03d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -203,46 +203,48 @@ static void sig_atexit(void) kill(getpid(), signr); } -static void pid_synthesize_comm_event(pid_t pid, int full) +static pid_t pid_synthesize_comm_event(pid_t pid, int full) { struct comm_event comm_ev; char filename[PATH_MAX]; char bf[BUFSIZ]; - int fd; - size_t size; - char *field, *sep; + FILE *fp; + size_t size = 0; DIR *tasks; struct dirent dirent, *next; + pid_t tgid = 0; - snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); + snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - fd = open(filename, O_RDONLY); - if (fd < 0) { + fp = fopen(filename, "r"); + if (fd == NULL) { /* * We raced with a task exiting - just return: */ if (verbose) fprintf(stderr, "couldn't open %s\n", filename); - return; + return 0; } - if (read(fd, bf, sizeof(bf)) < 0) { - fprintf(stderr, "couldn't read %s\n", filename); - exit(EXIT_FAILURE); - } - close(fd); - /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */ memset(&comm_ev, 0, sizeof(comm_ev)); - field = strchr(bf, '('); - if (field == NULL) - goto out_failure; - sep = strchr(++field, ')'); - if (sep == NULL) - goto out_failure; - size = sep - field; - memcpy(comm_ev.comm, field, size++); - - comm_ev.pid = pid; + while (!comm_ev.comm[0] || !comm_ev.pid) { + if (fgets(bf, sizeof(bf), fp) == NULL) + goto out_failure; + + if (memcmp(bf, "Name:", 5) == 0) { + char *name = bf + 5; + while (*name && isspace(*name)) + ++name; + size = strlen(name) - 1; + memcpy(comm_ev.comm, name, size++); + } else if (memcmp(bf, "Tgid:", 5) == 0) { + char *tgids = bf + 5; + while (*tgids && isspace(*tgids)) + ++tgids; + tgid = comm_ev.pid = atoi(tgids); + } + } + comm_ev.header.type = PERF_EVENT_COMM; size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); @@ -251,7 +253,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) comm_ev.tid = pid; write_output(&comm_ev, comm_ev.header.size); - return; + goto out_fclose; } snprintf(filename, sizeof(filename), "/proc/%d/task", pid); @@ -268,7 +270,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full) write_output(&comm_ev, comm_ev.header.size); } closedir(tasks); - return; + +out_fclose: + fclose(fp); + return tgid; out_failure: fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", @@ -276,7 +281,7 @@ out_failure: exit(EXIT_FAILURE); } -static void pid_synthesize_mmap_samples(pid_t pid) +static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) { char filename[PATH_MAX]; FILE *fp; @@ -328,7 +333,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) mmap_ev.len -= mmap_ev.start; mmap_ev.header.size = (sizeof(mmap_ev) - (sizeof(mmap_ev.filename) - size)); - mmap_ev.pid = pid; + mmap_ev.pid = tgid; mmap_ev.tid = pid; write_output(&mmap_ev, mmap_ev.header.size); @@ -347,14 +352,14 @@ static void synthesize_all(void) while (!readdir_r(proc, &dirent, &next) && next) { char *end; - pid_t pid; + pid_t pid, tgid; pid = strtol(dirent.d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; - pid_synthesize_comm_event(pid, 1); - pid_synthesize_mmap_samples(pid); + tgid = pid_synthesize_comm_event(pid, 1); + pid_synthesize_mmap_samples(pid, tgid); } closedir(proc); @@ -567,8 +572,8 @@ static int __cmd_record(int argc, const char **argv) perf_header__write(header, output); if (!system_wide) { - pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid); + pid_t tgid = pid_synthesize_comm_event(pid, 0); + pid_synthesize_mmap_samples(pid, tgid); } else synthesize_all(); -- cgit v1.2.3-70-g09d2 From 94a24752fe95ca1e7f98b197052d44e6a207740d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Aug 2009 16:21:38 -0300 Subject: perf report: Show the tid too in -D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This made it easier to find the firefox threading related bug. Signed-off-by: Arnaldo Carvalho de Melo Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Frédéric Weisbecker LKML-Reference: <20090811192138.GE18061@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 99274cec0ad..23e1457f240 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1526,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, - event->ip.pid, + event->ip.pid, event->ip.tid, (void *)(long)ip, (long long)period); @@ -1612,10 +1612,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) struct thread *thread = threads__findnew(event->mmap.pid); struct map *map = map__new(&event->mmap); - dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, + event->mmap.tid, (void *)(long)event->mmap.start, (void *)(long)event->mmap.len, (void *)(long)event->mmap.pgoff, -- cgit v1.2.3-70-g09d2 From 247648e3742ded01e42a4b14c2da330b13cbb47f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Aug 2009 16:22:11 -0300 Subject: perf tools: Fix fallback to cplus_demangle() when bfd_demangle() is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In old binutils we can't access bfd_demangle(), use cplus_demangle() just like oprofile. Signed-off-by: Arnaldo Carvalho de Melo Cc: Luis Claudio R. Gonçalves Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Frédéric Weisbecker LKML-Reference: <20090811192211.GG18061@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 29 ++++++++++++++++++----------- tools/perf/util/symbol.c | 15 --------------- tools/perf/util/symbol.h | 24 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 60411e94113..c045b4271e5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -382,22 +382,29 @@ endif ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") - has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") - - has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") - ifeq ($(has_bfd),y) EXTLIBS += -lbfd - else ifeq ($(has_bfd_iberty),y) - EXTLIBS += -lbfd -liberty - else ifeq ($(has_bfd_iberty_z),y) - EXTLIBS += -lbfd -liberty -lz else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) - BASIC_CFLAGS += -DNO_DEMANGLE + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") + ifeq ($(has_bfd_iberty),y) + EXTLIBS += -lbfd -liberty + else + has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") + ifeq ($(has_bfd_iberty_z),y) + EXTLIBS += -lbfd -liberty -lz + else + has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y") + ifeq ($(has_cplus_demangle),y) + EXTLIBS += -liberty + BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE + else + msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) + BASIC_CFLAGS += -DNO_DEMANGLE + endif + endif + endif endif endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f1dcede1430..2473fd427be 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -7,23 +7,8 @@ #include #include -#ifndef NO_DEMANGLE -#include -#else -static inline -char *bfd_demangle(void __used *v, const char __used *c, int __used i) -{ - return NULL; -} -#endif - const char *sym_hist_filter; -#ifndef DMGL_PARAMS -#define DMGL_PARAMS (1 << 0) /* Include function args */ -#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ -#endif - enum dso_origin { DSO__ORIG_KERNEL = 0, DSO__ORIG_JAVA_JIT, diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1e003ec2f4b..b53bf0125c1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -7,6 +7,30 @@ #include #include "module.h" +#ifdef HAVE_CPLUS_DEMANGLE +extern char *cplus_demangle(const char *, int); + +static inline char *bfd_demangle(void __used *v, const char *c, int i) +{ + return cplus_demangle(c, i); +} +#else +#ifdef NO_DEMANGLE +static inline char *bfd_demangle(void __used *v, const char __used *c, + int __used i) +{ + return NULL; +} +#else +#include +#endif +#endif + +#ifndef DMGL_PARAMS +#define DMGL_PARAMS (1 << 0) /* Include function args */ +#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ +#endif + struct symbol { struct rb_node rb_node; u64 start; -- cgit v1.2.3-70-g09d2 From 1340e6bbaff7ff7f6f75eb4a5c34933efce84a84 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Aug 2009 17:04:36 -0300 Subject: perf tools: Fix dso__new handle() to handle deleted DSOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is better than showing the map addr, this way at least we know that we can't get the symtabs because the DSO was deleted (system update) while an app still used such DSO. Yeah, don't do that, but if you do, you'll figure it out quicker this way. [acme@doppio linux-2.6-tip]$ perf report | head -15 # Samples: 3796 # # Overhead Command Shared Object Symbol # ........ ....... ................................................................... ...... # 23.55% pidgin /lib64/libglib-2.0.so.0.2000.4.#prelink#.Pd98lu (deleted) [.] 0x00000000038844 21.55% pidgin /lib64/libpthread-2.10.1.so.#prelink#.AFwK8Q (deleted) [.] 0x0000000000a42d 10.85% pidgin [kernel] [.] vread_hpet 7.85% pidgin /lib64/libgobject-2.0.so.0.2000.4.#prelink#.o1vpU7 (deleted) [.] 0x00000000014de8 3.35% pidgin /lib64/libc-2.10.1.so (deleted) [.] 0x0000000007a875 3.19% pidgin /lib64/libdbus-1.so.3.4.0.#prelink#.6mwgZP (deleted) [.] 0x0000000001d254 3.06% pidgin /usr/lib64/libgtk-x11-2.0.so.0.1600.5.#prelink#.511hAl (deleted) [.] 0x000000002334e7 2.90% pidgin /usr/lib64/libgdk-x11-2.0.so.0.1600.5.#prelink#.5qlMo1 (deleted) [.] 0x00000000037b2d 1.84% pidgin [kernel] [k] do_sys_poll 1.45% pidgin /usr/lib64/libX11.so.6.2.0.#prelink#.iR59Rx (deleted) [.] 0x0000000004c751 [acme@doppio linux-2.6-tip]$ Signed-off-by: Arnaldo Carvalho de Melo Cc: Luis Claudio R. Gonçalves Cc: Clark Williams Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Frédéric Weisbecker LKML-Reference: <20090811200436.GA3478@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 2473fd427be..5c0f42e6b33 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -801,6 +801,8 @@ more: } out: free(name); + if (ret < 0 && strstr(self->name, " (deleted)") != NULL) + return 0; return ret; } -- cgit v1.2.3-70-g09d2 From 0a5ac84650fb7a7f226814103d95724e34b012ae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Aug 2009 11:18:01 +0200 Subject: perf record: Add missing -C option support for specifying profile cpu perf top supports a -C for setting the profile CPU, but perf record does not. This adds the same option for perf record, allowing the user to specify a specific target profile CPU. Signed-off-by: Jens Axboe Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20090812091801.GC12579@kernel.dk> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 30b83def03d..de76008fbaa 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -35,6 +35,7 @@ static const char *output_name = "perf.data"; static int group = 0; static unsigned int realtime_prio = 0; static int system_wide = 0; +static int profile_cpu = -1; static pid_t target_pid = -1; static int inherit = 1; static int force = 0; @@ -431,6 +432,8 @@ try_again: if (err == EPERM) die("Permission error - are you root?\n"); + else if (err == ENODEV && profile_cpu != -1) + die("No such device - did you specify an out-of-range profile CPU?\n"); /* * If it's cycles then fall back to hrtimer @@ -564,9 +567,15 @@ static int __cmd_record(int argc, const char **argv) if (pid == -1) pid = getpid(); - open_counters(-1, pid); - } else for (i = 0; i < nr_cpus; i++) - open_counters(i, target_pid); + open_counters(profile_cpu, pid); + } else { + if (profile_cpu != -1) { + open_counters(profile_cpu, target_pid); + } else { + for (i = 0; i < nr_cpus; i++) + open_counters(i, target_pid); + } + } if (file_new) perf_header__write(header, output); @@ -645,6 +654,8 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_BOOLEAN('A', "append", &append_file, "append to the output file to do incremental profiling"), + OPT_INTEGER('C', "profile_cpu", &profile_cpu, + "CPU to profile on"), OPT_BOOLEAN('f', "force", &force, "overwrite existing data file"), OPT_LONG('c', "count", &default_interval, -- cgit v1.2.3-70-g09d2 From 8f7a0dc51674ad0dd06155291b0aed60d655943c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2009 14:44:59 -0300 Subject: perf list: Fix large list output by using the pager When /sys/kernel/debug is mounted the list can be imense, so use the pager like the other tools. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: <20090812174459.GB3495@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-list.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index f990fa8a35c..d88c6961274 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -10,11 +10,12 @@ #include "perf.h" -#include "util/parse-options.h" #include "util/parse-events.h" +#include "util/cache.h" int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) { + setup_pager(); print_events(); return 0; } -- cgit v1.2.3-70-g09d2 From 3a9f131fb00b8ac5950a11ad1599e45edfb5ae44 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 Aug 2009 10:27:18 +0200 Subject: perf tools: Add a per tracepoint counter attribute to get raw sample Add a new flag field while opening a tracepoint perf counter: -e tracepoint_subsystem:tracepoint_name:flags This is intended to be generic although for now it only supports the r[e[c[o[r[d]]]]] flag: ./perf record -e workqueue:workqueue_insertion:record ./perf record -e workqueue:workqueue_insertion:r will have the same effect: enabling the raw samples record for the given tracepoint counter. In the future, we may want to support further flags, separated by commas. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250152039-7284-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 2 +- tools/perf/util/parse-events.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index de76008fbaa..78adc47da86 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -398,7 +398,7 @@ static void create_counter(int counter, int cpu, pid_t pid) PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4858d83b3b6..04417840878 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp, struct perf_counter_attr *attr) { const char *evt_name; + char *flags; char sys_name[MAX_EVENT_LENGTH]; char id_buf[4]; int fd; @@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp, strncpy(sys_name, *strp, sys_length); sys_name[sys_length] = '\0'; evt_name = evt_name + 1; + + flags = strchr(evt_name, ':'); + if (flags) { + *flags = '\0'; + flags++; + if (!strncmp(flags, "record", strlen(flags))) + attr->sample_type |= PERF_SAMPLE_RAW; + } + evt_length = strlen(evt_name); if (evt_length >= MAX_EVENT_LENGTH) return 0; -- cgit v1.2.3-70-g09d2 From daac07b2e6b77f1bd44104aa2f0593e5505f27d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 Aug 2009 10:27:19 +0200 Subject: perf tools: Add a general option to enable raw sample records While we can enable the perf sample records per tracepoint counter, we may also want to enable this option for every tracepoint counters to open, so that we don't need to add a :record flag for all of them. Add the -R, --raw-samples options for this purpose. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1250152039-7284-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 78adc47da86..3d051b9cf25 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -34,6 +34,7 @@ static int output; static const char *output_name = "perf.data"; static int group = 0; static unsigned int realtime_prio = 0; +static int raw_samples = 0; static int system_wide = 0; static int profile_cpu = -1; static pid_t target_pid = -1; @@ -418,6 +419,8 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + if (raw_samples) + attr->sample_type |= PERF_SAMPLE_RAW; attr->mmap = track; attr->comm = track; @@ -650,6 +653,8 @@ static const struct option options[] = { "record events on existing pid"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), + OPT_BOOLEAN('R', "raw-samples", &raw_samples, + "collect raw sample records from all opened counters"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_BOOLEAN('A', "append", &append_file, -- cgit v1.2.3-70-g09d2 From 8fd101f20bdf771949a8f3a5a779877d09b2fb56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Aug 2009 18:19:57 -0300 Subject: perf report: Don't show unresolved DSOs and symbols when -S/-d is used We're interested in just those symbols/DSOs, so filter out the unresolved ones. Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <20090812211957.GE3495@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 23e1457f240..b53a60fc12d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1590,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (show & show_mask) { struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) + if (dso_list && (!dso || !dso->name || + !strlist__has_entry(dso_list, dso->name))) return 0; - if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name))) return 0; if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { -- cgit v1.2.3-70-g09d2