From 52d422de22b26d96bbdfbc605eb31c2994df6d0b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 10 Jul 2009 22:47:28 -0300 Subject: perf report: Adjust column width to the values sampled Auto-adjust column width of perf report output to the longest occuring string length. Example: [acme@doppio pahole]$ perf report --sort comm,dso,symbol | head -13 12.79% pahole /usr/lib64/libdw-0.141.so [.] __libdw_find_attr 8.90% pahole /lib64/libc-2.10.1.so [.] _int_malloc 8.68% pahole /usr/lib64/libdw-0.141.so [.] __libdw_form_val_len 8.15% pahole /lib64/libc-2.10.1.so [.] __GI_strcmp 6.80% pahole /lib64/libc-2.10.1.so [.] __tsearch 5.54% pahole ./build/libdwarves.so.1.0.0 [.] tag__recode_dwarf_type [acme@doppio pahole]$ [acme@doppio pahole]$ perf report --sort comm,dso,symbol -d /lib64/libc-2.10.1.so | head -10 21.92% pahole /lib64/libc-2.10.1.so [.] _int_malloc 20.08% pahole /lib64/libc-2.10.1.so [.] __GI_strcmp 16.75% pahole /lib64/libc-2.10.1.so [.] __tsearch [acme@doppio pahole]$ Also add these extra options to control the new behaviour: -w, --field-width Force each column width to the provided list, for large terminal readability. -t, --field-separator: Use a special separator character and don't pad with spaces, replacing all occurances of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090711014728.GH3452@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8aa3f8c8870..05774dfbd14 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -33,6 +33,18 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. +-w:: +--field-width=:: + Force each column width to the provided list, for large terminal + readability. + +-t:: +--field-separator=:: + + Use a special separator character and don't pad with spaces, replacing + all occurances of this separator in symbol names (and other output) + with a '.' character, that thus it's the only non valid separator. + SEE ALSO -------- linkperf:perf-stat[1] -- cgit v1.2.3-70-g09d2 From e3d7e183dc276df2fcaf02af173a49ad119ba9f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 11 Jul 2009 12:18:37 -0300 Subject: perf report: Introduce -n/--show-nr-samples [acme@doppio pahole]$ perf report -ns comm,dso,symbol -d /lib64/libc-2.10.1.so -C pahole | head -17 21.94% 32101 [.] _int_malloc 20.10% 29402 [.] __GI_strcmp 16.77% 24533 [.] __tsearch 12.61% 18450 [.] malloc_consolidate 6.42% 9394 [.] _int_free 6.28% 9191 [.] __tfind 4.56% 6678 [.] __GI___libc_free 4.46% 6520 [.] _IO_vfprintf_internal 2.59% 3786 [.] __malloc 1.17% 1716 [.] __GI_memcpy [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo LKML-Reference: <1247325517-12272-5-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/builtin-report.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 05774dfbd14..e72e9311078 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,6 +24,9 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. +-n +--show-nr-samples + Show the number of samples for each symbol -C:: --comms=:: Only consider symbols in these comms. CSV that understands diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f3422121d85..430a195b858 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -51,6 +51,7 @@ static int verbose; static int modules; static int full_paths; +static int show_nr_samples; static unsigned long page_size; static unsigned long mmap_window = 32; @@ -1024,6 +1025,13 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) else ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count); + if (show_nr_samples) { + if (field_sep) + fprintf(fp, "%c%lld", *field_sep, self->count); + else + fprintf(fp, "%11lld", self->count); + } + list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) continue; @@ -1361,6 +1369,12 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); + if (show_nr_samples) { + if (field_sep) + fprintf(fp, "%cSamples", *field_sep); + else + fputs(" Samples ", fp); + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) continue; @@ -1388,6 +1402,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) goto print_entries; fprintf(fp, "# ........"); + if (show_nr_samples) + fprintf(fp, " .........."); list_for_each_entry(se, &hist_entry__sort_list, list) { unsigned int i; @@ -1979,6 +1995,8 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), + OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, + "Show a column with the number of samples"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, -- cgit v1.2.3-70-g09d2 From 386c0b702b1ea81c0f54f5c9832a3d4a52270f14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 5 Aug 2009 10:04:53 -0300 Subject: perf report: Add missing command line options to man page Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <20090805130453.GC10688@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 60 ++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 3 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1dbc1eeb4c0..6be696b0a2b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -29,13 +29,67 @@ OPTIONS Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. -a:: - system-wide collection + System-wide collection. -l:: - scale counter values + Scale counter values. + +-p:: +--pid=:: + Record events on existing pid. + +-r:: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. +-A:: +--append:: + Append to the output file to do incremental profiling. + +-f:: +--force:: + Overwrite existing data file. + +-c:: +--count=:: + Event period to sample. + +-o:: +--output=:: + Output file name. + +-i:: +--inherit:: + Child tasks inherit counters. +-F:: +--freq=:: + Profile at this frequency. + +-m:: +--mmap-pages=:: + Number of mmap data pages. + +-g:: +--call-graph:: + Do call-graph (stack chain/backtrace) recording. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-s:: +--stat:: + Per thread counts. + +-d:: +--data:: + Sample addresses. + +-n:: +--no-samples:: + Don't sample. SEE ALSO -------- -- cgit v1.2.3-70-g09d2 From 836179834833272f89098c6d1e1b89e8e69797c2 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 4 Aug 2009 10:24:41 +0200 Subject: perf top: Update man page perf_counter tools: update perf top manual page to reflect current implementation. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-top.txt | 112 ++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 13 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 539d0128972..4a7d558dc30 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -3,36 +3,122 @@ perf-top(1) NAME ---- -perf-top - Run a command and profile it +perf-top - System profiling tool. SYNOPSIS -------- [verse] -'perf top' [-e | --event=EVENT] [-l] [-a] +'perf top' [-e | --event=EVENT] [] DESCRIPTION ----------- -This command runs a command and gathers a performance counter profile -from it. +This command generates and displays a performance counter profile in realtime. OPTIONS ------- -...:: - Any command you can specify in a shell. +-a:: +--all-cpus:: + System-wide collection. (default) + +-c :: +--count=:: + Event period to sample. + +-C :: +--CPU=:: + CPU to profile. + +-d :: +--delay=:: + Number of seconds to delay between refreshes. --e:: ---event=:: +-e :: +--event=:: Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. --a:: - system-wide collection +-E :: +--entries=:: + Display this many functions. + +-f :: +--count-filter=:: + Only display functions with more events than this. + +-F :: +--freq=:: + Profile at this frequency. + +-i:: +--inherit:: + Child tasks inherit counters, only makes sens with -p option. + +-k :: +--vmlinux=:: + Path to vmlinux. Required for annotation functionality. + +-m :: +--mmap-pages=:: + Number of mmapped data pages. + +-p :: +--pid=:: + Profile events on existing pid. + +-r :: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. + +-s :: +--sym-annotate=:: + Annotate this symbol. Requires -k option. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-z:: +--zero:: + Zero history across display updates. + +INTERACTIVE PROMPTING KEYS +-------------------------- + +[d]:: + Display refresh delay. + +[e]:: + Number of entries to display. + +[E]:: + Event to display when multiple counters are active. + +[f]:: + Profile display filter (>= hit count). + +[F]:: + Annotation display filter (>= % of total). + +[s]:: + Annotate symbol. + +[S]:: + Stop annotation, return to full profile display. + +[w]:: + Toggle between weighted sum and individual count[E]r profile. + +[z]:: + Toggle event count zeroing across display updates. + +[qQ]:: + Quit. + +Pressing any unmapped key displays a menu, and prompts for input. --l:: - scale counter values SEE ALSO -------- -- cgit v1.2.3-70-g09d2 From b26bc5a7f81474937e427b0c855eabee5ad56f89 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 7 Aug 2009 10:18:39 +0200 Subject: perf stat: Fix tool option consistency: rename -S/--scale to -c/--scale We want to use a coherent flag for -S/--stat across all tools, so free up -S in perf stat. Signed-off-by: Brice Goglin Cc: Peter Zijlstra Cc: paulus@samba.org Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 0d74346d21a..484080dd5b6 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --S:: +-c:: scale counter values EXAMPLES diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f9510eeeb6c..b4b06c7903e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -496,7 +496,7 @@ static const struct option options[] = { "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, + OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), -- cgit v1.2.3-70-g09d2 From c24b513337f06712b8c81eb4f1413d44591dfee7 Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Wed, 5 Aug 2009 20:53:34 +0200 Subject: perf: "Longum est iter per praecepta, breve et efficax per exempla" A few examples of how 'perf' can be used, from an e-mail by Ingo Molnar http://lkml.org/lkml/2009/8/4/346. Signed-off-by: Carlos R. Mafra Cc: Peter Zijlstra Cc: Valdis.Kletnieks@vt.edu LKML-Reference: <20090805185334.GA4535@Pilar.aei.mpg.de> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-examples.txt | 225 +++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 tools/perf/Documentation/perf-examples.txt (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt new file mode 100644 index 00000000000..8eb6c489fb1 --- /dev/null +++ b/tools/perf/Documentation/perf-examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free_direct [Tracepoint event] + kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_pagevec_free + 20934 kmem:mm_page_free_direct + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_pagevec_free ( +- 0.934% ) + 18376 kmem:mm_page_free_direct ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_pagevec_free -e +kmem:mm_page_free_direct sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_pagevec_free + 254115 kmem:mm_page_free_direct + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_pagevec_free ( +- 4.820% ) + 25653 kmem:mm_page_free_direct ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + -- cgit v1.2.3-70-g09d2 From 2932cffc89e9a1476b28a59896fa4f81e0d4f131 Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Mon, 17 Aug 2009 00:36:21 +0200 Subject: perf: Rename perf-examples.txt to examples.txt Rename it to examples.txt to avoid the perf-*.txt pattern in the Makefile, otherwise 'make doc' fails because perf-examples.txt is not formatted to be a man page: ERROR: perf-examples.txt: line 1: manpage document title is mandatory Signed-off-by: Carlos R. Mafra Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Documentation/examples.txt | 225 +++++++++++++++++++++++++++++ tools/perf/Documentation/perf-examples.txt | 225 ----------------------------- 2 files changed, 225 insertions(+), 225 deletions(-) create mode 100644 tools/perf/Documentation/examples.txt delete mode 100644 tools/perf/Documentation/perf-examples.txt (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt new file mode 100644 index 00000000000..8eb6c489fb1 --- /dev/null +++ b/tools/perf/Documentation/examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free_direct [Tracepoint event] + kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_pagevec_free + 20934 kmem:mm_page_free_direct + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_pagevec_free ( +- 0.934% ) + 18376 kmem:mm_page_free_direct ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_pagevec_free -e +kmem:mm_page_free_direct sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_pagevec_free + 254115 kmem:mm_page_free_direct + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_pagevec_free ( +- 4.820% ) + 25653 kmem:mm_page_free_direct ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt deleted file mode 100644 index 8eb6c489fb1..00000000000 --- a/tools/perf/Documentation/perf-examples.txt +++ /dev/null @@ -1,225 +0,0 @@ - - ------------------------------ - ****** perf by examples ****** - ------------------------------ - -[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] - - -First, discovery/enumeration of available counters can be done via -'perf list': - -titan:~> perf list - [...] - kmem:kmalloc [Tracepoint event] - kmem:kmem_cache_alloc [Tracepoint event] - kmem:kmalloc_node [Tracepoint event] - kmem:kmem_cache_alloc_node [Tracepoint event] - kmem:kfree [Tracepoint event] - kmem:kmem_cache_free [Tracepoint event] - kmem:mm_page_free_direct [Tracepoint event] - kmem:mm_pagevec_free [Tracepoint event] - kmem:mm_page_alloc [Tracepoint event] - kmem:mm_page_alloc_zone_locked [Tracepoint event] - kmem:mm_page_pcpu_drain [Tracepoint event] - kmem:mm_page_alloc_extfrag [Tracepoint event] - -Then any (or all) of the above event sources can be activated and -measured. For example the page alloc/free properties of a 'hackbench -run' are: - - titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc - -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 - Time: 0.575 - - Performance counter stats for './hackbench 10': - - 13857 kmem:mm_page_pcpu_drain - 27576 kmem:mm_page_alloc - 6025 kmem:mm_pagevec_free - 20934 kmem:mm_page_free_direct - - 0.613972165 seconds time elapsed - -You can observe the statistical properties as well, by using the -'repeat the workload N times' feature of perf stat: - - titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e - kmem:mm_page_alloc -e kmem:mm_pagevec_free -e - kmem:mm_page_free_direct ./hackbench 10 - Time: 0.627 - Time: 0.644 - Time: 0.564 - Time: 0.559 - Time: 0.626 - - Performance counter stats for './hackbench 10' (5 runs): - - 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) - 25035 kmem:mm_page_alloc ( +- 3.783% ) - 6104 kmem:mm_pagevec_free ( +- 0.934% ) - 18376 kmem:mm_page_free_direct ( +- 4.941% ) - - 0.643954516 seconds time elapsed ( +- 2.363% ) - -Furthermore, these tracepoints can be used to sample the workload as -well. For example the page allocations done by a 'git gc' can be -captured the following way: - - titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc - Counting objects: 1148, done. - Delta compression using up to 2 threads. - Compressing objects: 100% (450/450), done. - Writing objects: 100% (1148/1148), done. - Total 1148 (delta 690), reused 1148 (delta 690) - [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] - -To check which functions generated page allocations: - - titan:~/git> perf report - # Samples: 10646 - # - # Overhead Command Shared Object - # ........ ............... .......................... - # - 23.57% git-repack /lib64/libc-2.5.so - 21.81% git /lib64/libc-2.5.so - 14.59% git ./git - 11.79% git-repack ./git - 7.12% git /lib64/ld-2.5.so - 3.16% git-repack /lib64/libpthread-2.5.so - 2.09% git-repack /bin/bash - 1.97% rm /lib64/libc-2.5.so - 1.39% mv /lib64/ld-2.5.so - 1.37% mv /lib64/libc-2.5.so - 1.12% git-repack /lib64/ld-2.5.so - 0.95% rm /lib64/ld-2.5.so - 0.90% git-update-serv /lib64/libc-2.5.so - 0.73% git-update-serv /lib64/ld-2.5.so - 0.68% perf /lib64/libpthread-2.5.so - 0.64% git-repack /usr/lib64/libz.so.1.2.3 - -Or to see it on a more finegrained level: - -titan:~/git> perf report --sort comm,dso,symbol -# Samples: 10646 -# -# Overhead Command Shared Object Symbol -# ........ ............... .......................... ...... -# - 9.35% git-repack ./git [.] insert_obj_hash - 9.12% git ./git [.] insert_obj_hash - 7.31% git /lib64/libc-2.5.so [.] memcpy - 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc - 6.24% git-repack /lib64/libc-2.5.so [.] memcpy - 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork - 5.47% git /lib64/libc-2.5.so [.] _int_malloc - 2.99% git /lib64/libc-2.5.so [.] memset - -Furthermore, call-graph sampling can be done too, of page -allocations - to see precisely what kind of page allocations there -are: - - titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc - Counting objects: 1148, done. - Delta compression using up to 2 threads. - Compressing objects: 100% (450/450), done. - Writing objects: 100% (1148/1148), done. - Total 1148 (delta 690), reused 1148 (delta 690) - [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] - - titan:~/git> perf report -g - # Samples: 10686 - # - # Overhead Command Shared Object - # ........ ............... .......................... - # - 23.25% git-repack /lib64/libc-2.5.so - | - |--50.00%-- _int_free - | - |--37.50%-- __GI___fork - | make_child - | - |--12.50%-- ptmalloc_unlock_all2 - | make_child - | - --6.25%-- __GI_strcpy - 21.61% git /lib64/libc-2.5.so - | - |--30.00%-- __GI_read - | | - | --83.33%-- git_config_from_file - | git_config - | | - [...] - -Or you can observe the whole system's page allocations for 10 -seconds: - -titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e -kmem:mm_page_alloc -e kmem:mm_pagevec_free -e -kmem:mm_page_free_direct sleep 10 - - Performance counter stats for 'sleep 10': - - 171585 kmem:mm_page_pcpu_drain - 322114 kmem:mm_page_alloc - 73623 kmem:mm_pagevec_free - 254115 kmem:mm_page_free_direct - - 10.000591410 seconds time elapsed - -Or observe how fluctuating the page allocations are, via statistical -analysis done over ten 1-second intervals: - - titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e - kmem:mm_page_alloc -e kmem:mm_pagevec_free -e - kmem:mm_page_free_direct sleep 1 - - Performance counter stats for 'sleep 1' (10 runs): - - 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) - 34394 kmem:mm_page_alloc ( +- 4.617% ) - 7509 kmem:mm_pagevec_free ( +- 4.820% ) - 25653 kmem:mm_page_free_direct ( +- 3.672% ) - - 1.058135029 seconds time elapsed ( +- 3.089% ) - -Or you can annotate the recorded 'git gc' run on a per symbol basis -and check which instructions/source-code generated page allocations: - - titan:~/git> perf annotate __GI___fork - ------------------------------------------------ - Percent | Source code & Disassembly of libc-2.5.so - ------------------------------------------------ - : - : - : Disassembly of section .plt: - : Disassembly of section .text: - : - : 00000031a2e95560 <__fork>: - [...] - 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax - 0.00 : 31a2e95607: 0f 05 syscall - 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax - 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> - 0.00 : 31a2e95615: 85 c0 test %eax,%eax - -( this shows that 83.42% of __GI___fork's page allocations come from - the 0x38 system call it performs. ) - -etc. etc. - a lot more is possible. I could list a dozen of -other different usecases straight away - neither of which is -possible via /proc/vmstat. - -/proc/vmstat is not in the same league really, in terms of -expressive power of system analysis and performance -analysis. - -All that the above results needed were those new tracepoints -in include/tracing/events/kmem.h. - - Ingo - - -- cgit v1.2.3-70-g09d2 From b395cd8a74b4a8d943dd4b5585e676f62f7350b3 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 18 Aug 2009 12:41:25 -0400 Subject: perf tools: Make 'make html' work pushd tools/perf/Documentation make html popd is failing for me... ASCIIDOC perf-annotate.html ERROR: unsafe: include file: /etc/asciidoc/./stylesheets/xhtml11.css ERROR: unsafe: include file: /etc/asciidoc/./stylesheets/xhtml11-manpage.css ERROR: unsafe: include file: /etc/asciidoc/./stylesheets/xhtml11-quirks.css make: *** [perf-annotate.html] Error 1 Apparently asciidoc "unsafe" is the default mode of operation in practice. https://bugzilla.redhat.com/show_bug.cgi?id=506953 Works tidily now. Signed-off-by: Kyle McMartin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20090818164125.GM25206@bombadil.infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/Documentation') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 5457192e1b4..bdd3b7ecad0 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -35,7 +35,7 @@ man7dir=$(mandir)/man7 # DESTDIR= ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = +ASCIIDOC_EXTRA = --unsafe MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install -- cgit v1.2.3-70-g09d2