diff options
Diffstat (limited to 'tools/perf')
98 files changed, 2185 insertions, 990 deletions
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index b2c63309a65..6f5a498608b 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -24,12 +24,47 @@ OPTIONS --input=:: Input file name. (default: perf.data) +-d:: +--dsos=<dso[,dso...]>:: + Only consider symbols in these dsos. +-s:: +--symbol=<symbol>:: + Symbol to annotate. + +-f:: +--force:: + Don't complain, do it. + +-v:: +--verbose:: + Be more verbose. (Show symbol address, etc) + +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + +-k:: +--vmlinux=<file>:: + vmlinux pathname. + +-m:: +--modules:: + Load module symbols. WARNING: use only with -k and LIVE kernel. + +-l:: +--print-line:: + Print matching source lines (may be slow). + +-P:: +--full-paths:: + Don't shorten the displayed pathnames. + --stdio:: Use the stdio interface. --tui:: Use the TUI interface Use of --tui requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. This interfaces starts by centering on the line with more - samples, TAB/UNTAB cycles thru the lines with more samples. + samples, TAB/UNTAB cycles through the lines with more samples. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt index 01b642c0bf8..5eaac6f26d5 100644 --- a/tools/perf/Documentation/perf-buildid-list.txt +++ b/tools/perf/Documentation/perf-buildid-list.txt @@ -18,6 +18,9 @@ perf report. OPTIONS ------- +-H:: +--with-hits:: + Show only DSOs with hits. -i:: --input=:: Input file name. (default: perf.data) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 20d97d84ea1..74d7481ed7a 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data. OPTIONS ------- +-M:: +--displacement:: + Show position displacement relative to baseline. + +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + +-m:: +--modules:: + Load module symbols. WARNING: use only with -k and LIVE kernel + -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands @@ -42,7 +54,7 @@ OPTIONS --field-separator=:: Use a special separator character and don't pad with spaces, replacing - all occurances of this separator in symbol names (and other output) + all occurrences of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. -v:: @@ -50,6 +62,13 @@ OPTIONS Be verbose, for instance, show the raw counts in addition to the diff. +-f:: +--force:: + Don't complain, do it. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index d004e19fe6d..dd84cb2f0a8 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -22,7 +22,7 @@ There are a couple of variants of perf kvm: a performance counter profile of guest os in realtime of an arbitrary workload. - 'perf kvm record <command>' to record the performance couinter profile + 'perf kvm record <command>' to record the performance counter profile of an arbitrary workload and save it into a perf data file. If both --host and --guest are input, the perf data file name is perf.data.kvm. If there is no --host but --guest, the file name is perf.data.guest. @@ -40,6 +40,12 @@ There are a couple of variants of perf kvm: OPTIONS ------- +-i:: +--input=:: + Input file name. +-o:: +--output:: + Output file name. --host=:: Collect host side performance profile. --guest=:: diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index b317102138c..921de259ea1 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -24,6 +24,21 @@ and statistics with this 'perf lock' command. 'perf lock report' reports statistical data. +OPTIONS +------- + +-i:: +--input=<file>:: + Input file name. + +-v:: +--verbose:: + Be more verbose (show symbol address, etc). + +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 62de1b7f4e7..86b797a35aa 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -115,9 +115,9 @@ Each probe argument follows below syntax. LINE SYNTAX ----------- -Line range is descripted by following syntax. +Line range is described by following syntax. - "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" + "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" FUNC specifies the function name of showing lines. 'RLN' is the start line number from function entry line, and 'RLN2' is the end line number. As same as diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a91f9f9e6e5..52462ae2645 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -39,15 +39,24 @@ OPTIONS be passed as follows: '\mem:addr[:[r][w][x]]'. If you want to profile read-write accesses in 0x1000, just set 'mem:0x1000:rw'. + +--filter=<filter>:: + Event filter. + -a:: - System-wide collection. +--all-cpus:: + System-wide collection from all CPUs. -l:: Scale counter values. -p:: --pid=:: - Record events on existing pid. + Record events on existing process ID. + +-t:: +--tid=:: + Record events on existing thread ID. -r:: --realtime=:: @@ -99,6 +108,11 @@ OPTIONS --data:: Sample addresses. +-T:: +--timestamp:: + Sample timestamps. Use it with 'perf report -D' to see the timestamps, + for instance. + -n:: --no-samples:: Don't sample. @@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun -C:: --cpu:: -Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a -comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. In per-thread mode with inheritance mode on (default), samples are captured only when the thread executes on the designated CPUs. Default is to monitor all CPUs. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 12052c9ed0b..8ba03d6e539 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -20,6 +20,11 @@ OPTIONS -i:: --input=:: Input file name. (default: perf.data) + +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands @@ -27,6 +32,10 @@ OPTIONS -n:: --show-nr-samples:: Show the number of samples for each symbol + +--showcpuutilization:: + Show sample percentage for different cpu modes. + -T:: --threads:: Show per-thread event counters @@ -39,12 +48,24 @@ OPTIONS Only consider these symbols. CSV that understands file://filename entries. +-U:: +--hide-unresolved:: + Only display entries resolved to a symbol. + -s:: --sort=:: Sort by key(s): pid, comm, dso, symbol, parent. +-p:: +--parent=<regex>:: + regex filter to identify parent, see: '--sort parent' + +-x:: +--exclude-other:: + Only display entries with parent-match. + -w:: ---field-width=:: +--column-widths=<width[,width...]>:: Force each column width to the provided list, for large terminal readability. @@ -52,19 +73,26 @@ OPTIONS --field-separator=:: Use a special separator character and don't pad with spaces, replacing - all occurances of this separator in symbol names (and other output) + all occurrences of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. +-D:: +--dump-raw-trace:: + Dump raw trace in ASCII. + -g [type,min]:: --call-graph:: - Display callchains using type and min percent threshold. + Display call chains using type and min percent threshold. type can be either: - - flat: single column, linear exposure of callchains. + - flat: single column, linear exposure of call chains. - graph: use a graph tree, displaying absolute overhead rates. - fractal: like graph, but displays relative rates. Each branch of the tree is considered as a new profiled object. + Default: fractal,0.5. +--pretty=<key>:: + Pretty printing style. key: normal, raw + --stdio:: Use the stdio interface. --tui:: Use the TUI interface, that is integrated with annotate and allows @@ -72,6 +100,25 @@ OPTIONS requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. +-k:: +--vmlinux=<file>:: + vmlinux pathname + +--kallsyms=<file>:: + kallsyms pathname + +-m:: +--modules:: + Load module symbols. WARNING: This should only be used with -k and + a LIVE kernel. + +-f:: +--force:: + Don't complain, do it. + +--symfs=<directory>:: + Look for files with symbols relative to this directory. + SEE ALSO -------- linkperf:perf-stat[1] diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 8417644a616..46822d5fde1 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies) SYNOPSIS -------- [verse] -'perf sched' {record|latency|replay|trace} +'perf sched' {record|latency|map|replay|trace} DESCRIPTION ----------- -There are four variants of perf sched: +There are five variants of perf sched: 'perf sched record <command>' to record the scheduling events of an arbitrary workload. @@ -30,8 +30,22 @@ There are four variants of perf sched: of the workload as it occurred when it was recorded - and can repeat it a number of times, measuring its performance.) + 'perf sched map' to print a textual context-switching outline of + workload captured via perf sched record. Columns stand for + individual CPUs, and the two-letter shortcuts stand for tasks that + are running on a CPU. A '*' denotes the CPU that had the event, and + a dot signals an idle CPU. + OPTIONS ------- +-i:: +--input=<file>:: + Input file name. (default: perf.data) + +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + -D:: --dump-raw-trace=:: Display verbose dump of the sched data. diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index ee6525ee6d6..5bb41e55a3a 100644 --- a/tools/perf/Documentation/perf-trace-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -1,19 +1,19 @@ -perf-trace-perl(1) +perf-script-perl(1) ================== NAME ---- -perf-trace-perl - Process trace data with a Perl script +perf-script-perl - Process trace data with a Perl script SYNOPSIS -------- [verse] -'perf trace' [-s [Perl]:script[.pl] ] +'perf script' [-s [Perl]:script[.pl] ] DESCRIPTION ----------- -This perf trace option is used to process perf trace data using perf's +This perf script option is used to process perf script data using perf's built-in Perl interpreter. It reads and processes the input file and displays the results of the trace analysis implemented in the given Perl script, if any. @@ -21,7 +21,7 @@ Perl script, if any. STARTER SCRIPTS --------------- -You can avoid reading the rest of this document by running 'perf trace +You can avoid reading the rest of this document by running 'perf script -g perl' in the same directory as an existing perf.data trace file. That will generate a starter script containing a handler for each of the event types in the trace file; it simply prints every available @@ -30,13 +30,13 @@ field for each event in the trace file. You can also look at the existing scripts in ~/libexec/perf-core/scripts/perl for typical examples showing how to do basic things like aggregate event data, print results, etc. Also, -the check-perf-trace.pl script, while not interesting for its results, +the check-perf-script.pl script, while not interesting for its results, attempts to exercise all of the main scripting features. EVENT HANDLERS -------------- -When perf trace is invoked using a trace script, a user-defined +When perf script is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is ignored (or passed to a 'trace_handled' function, see below) and the @@ -112,13 +112,13 @@ write a useful trace script. The sections below cover the rest. SCRIPT LAYOUT ------------- -Every perf trace Perl script should start by setting up a Perl module +Every perf script Perl script should start by setting up a Perl module search path and 'use'ing a few support modules (see module descriptions below): ---- - use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; - use lib "./Perf-Trace-Util/lib"; + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; + use lib "./perf-script-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Context; use Perf::Trace::Util; @@ -162,7 +162,7 @@ sub trace_unhandled ---- The remaining sections provide descriptions of each of the available -built-in perf trace Perl modules and their associated functions. +built-in perf script Perl modules and their associated functions. AVAILABLE MODULES AND FUNCTIONS ------------------------------- @@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS The following sections describe the functions and variables available via the various Perf::Trace::* Perl modules. To use the functions and variables from the given module, add the corresponding 'use -Perf::Trace::XXX' line to your perf trace script. +Perf::Trace::XXX' line to your perf script script. Perf::Trace::Core Module ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -204,7 +204,7 @@ argument. Perf::Trace::Util Module ~~~~~~~~~~~~~~~~~~~~~~~~ -Various utility functions for use with perf trace: +Various utility functions for use with perf script: nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair nsecs_secs($nsecs) - returns whole secs portion given nsecs @@ -214,4 +214,4 @@ Various utility functions for use with perf trace: SEE ALSO -------- -linkperf:perf-trace[1] +linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-script-python.txt index 693be804dd3..36b38277422 100644 --- a/tools/perf/Documentation/perf-trace-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -1,19 +1,19 @@ -perf-trace-python(1) +perf-script-python(1) ==================== NAME ---- -perf-trace-python - Process trace data with a Python script +perf-script-python - Process trace data with a Python script SYNOPSIS -------- [verse] -'perf trace' [-s [Python]:script[.py] ] +'perf script' [-s [Python]:script[.py] ] DESCRIPTION ----------- -This perf trace option is used to process perf trace data using perf's +This perf script option is used to process perf script data using perf's built-in Python interpreter. It reads and processes the input file and displays the results of the trace analysis implemented in the given Python script, if any. @@ -23,15 +23,15 @@ A QUICK EXAMPLE This section shows the process, start to finish, of creating a working Python script that aggregates and extracts useful information from a -raw perf trace stream. You can avoid reading the rest of this +raw perf script stream. You can avoid reading the rest of this document if an example is enough for you; the rest of the document provides more details on each step and lists the library functions available to script writers. This example actually details the steps that were used to create the -'syscall-counts' script you see when you list the available perf trace -scripts via 'perf trace -l'. As such, this script also shows how to -integrate your script into the list of general-purpose 'perf trace' +'syscall-counts' script you see when you list the available perf script +scripts via 'perf script -l'. As such, this script also shows how to +integrate your script into the list of general-purpose 'perf script' scripts listed by that command. The syscall-counts script is a simple script, but demonstrates all the @@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory called perf.data. Once we have a perf.data file containing our data, we can use the -g -'perf trace' option to generate a Python script that will contain a +'perf script' option to generate a Python script that will contain a callback handler for each event type found in the perf.data trace stream (for more details, see the STARTER SCRIPTS section). ---- -# perf trace -g python -generated Python script: perf-trace.py +# perf script -g python +generated Python script: perf-script.py The output file created also in the current directory is named -perf-trace.py. Here's the file in its entirety: +perf-script.py. Here's the file in its entirety: -# perf trace event handlers, generated by perf trace -g python +# perf script event handlers, generated by perf script -g python # Licensed under the terms of the GNU GPL License version 2 # The common_* event handler fields are the most useful fields common to # all events. They don't necessarily correspond to the 'common_*' fields # in the format files. Those fields not available as handler params can # be retrieved using Python functions of the form common_*(context). -# See the perf-trace-python Documentation for the list of available functions. +# See the perf-script-python Documentation for the list of available functions. import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + '/scripts/python/perf-script-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm): ---- At the top is a comment block followed by some import statements and a -path append which every perf trace script should include. +path append which every perf script script should include. Following that are a couple generated functions, trace_begin() and trace_end(), which are called at the beginning and the end of the @@ -189,8 +189,8 @@ simply a utility function used for that purpose. Let's rename the script and run it to see the default output: ---- -# mv perf-trace.py syscall-counts.py -# perf trace -s syscall-counts.py +# mv perf-script.py syscall-counts.py +# perf script -s syscall-counts.py raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= @@ -216,7 +216,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + '/scripts/python/perf-script-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -279,7 +279,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + '/scripts/python/perf-script-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -315,7 +315,7 @@ def print_syscall_totals(): The script can be run just as before: - # perf trace -s syscall-counts.py + # perf script -s syscall-counts.py So those are the essential steps in writing and running a script. The process can be generalized to any tracepoint or set of tracepoints @@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by 'perf list' and/or look in /sys/kernel/debug/tracing events for detailed event and field info, record the corresponding trace data using 'perf record', passing it the list of interesting events, -generate a skeleton script using 'perf trace -g python' and modify the +generate a skeleton script using 'perf script -g python' and modify the code to aggregate and display it for your particular needs. After you've done that you may end up with a general-purpose script that you want to keep around and have available for future use. By writing a couple of very simple shell scripts and putting them in the right place, you can have your script listed alongside the other -scripts listed by the 'perf trace -l' command e.g.: +scripts listed by the 'perf script -l' command e.g.: ---- -root@tropicana:~# perf trace -l +root@tropicana:~# perf script -l List of available trace scripts: workqueue-stats workqueue stats (ins/exe/create/destroy) wakeup-latency system-wide min/max/avg wakeup latency @@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter The 'report' script is also a shell script with the same base name as your script, but with -report appended. It should also be located in the perf/scripts/python/bin directory. In that script, you write the -'perf trace -s' command-line needed for running your script: +'perf script -s' command-line needed for running your script: ---- # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report #!/bin/bash # description: system-wide syscall counts -perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py +perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py ---- Note that the location of the Python script given in the shell script @@ -390,17 +390,17 @@ total 32 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin --rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py -drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util +-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py ---- Once you've done that (don't forget to do a new 'make install', -otherwise your script won't show up at run-time), 'perf trace -l' +otherwise your script won't show up at run-time), 'perf script -l' should show a new entry for your script: ---- -root@tropicana:~# perf trace -l +root@tropicana:~# perf script -l List of available trace scripts: workqueue-stats workqueue stats (ins/exe/create/destroy) wakeup-latency system-wide min/max/avg wakeup latency @@ -409,19 +409,19 @@ List of available trace scripts: syscall-counts system-wide syscall counts ---- -You can now perform the record step via 'perf trace record': +You can now perform the record step via 'perf script record': - # perf trace record syscall-counts + # perf script record syscall-counts -and display the output using 'perf trace report': +and display the output using 'perf script report': - # perf trace report syscall-counts + # perf script report syscall-counts STARTER SCRIPTS --------------- You can quickly get started writing a script for a particular set of -trace data by generating a skeleton script using 'perf trace -g +trace data by generating a skeleton script using 'perf script -g python' in the same directory as an existing perf.data trace file. That will generate a starter script containing a handler for each of the event types in the trace file; it simply prints every available @@ -430,13 +430,13 @@ field for each event in the trace file. You can also look at the existing scripts in ~/libexec/perf-core/scripts/python for typical examples showing how to do basic things like aggregate event data, print results, etc. Also, -the check-perf-trace.py script, while not interesting for its results, +the check-perf-script.py script, while not interesting for its results, attempts to exercise all of the main scripting features. EVENT HANDLERS -------------- -When perf trace is invoked using a trace script, a user-defined +When perf script is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is ignored (or passed to a 'trace_handled' function, see below) and the @@ -510,7 +510,7 @@ write a useful trace script. The sections below cover the rest. SCRIPT LAYOUT ------------- -Every perf trace Python script should start by setting up a Python +Every perf script Python script should start by setting up a Python module search path and 'import'ing a few support modules (see module descriptions below): @@ -519,7 +519,7 @@ descriptions below): import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + '/scripts/python/perf-script-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs, ---- The remaining sections provide descriptions of each of the available -built-in perf trace Python modules and their associated functions. +built-in perf script Python modules and their associated functions. AVAILABLE MODULES AND FUNCTIONS ------------------------------- The following sections describe the functions and variables available -via the various perf trace Python modules. To use the functions and +via the various perf script Python modules. To use the functions and variables from the given module, add the corresponding 'from XXXX -import' line to your perf trace script. +import' line to your perf script script. Core.py Module ~~~~~~~~~~~~~~ @@ -610,7 +610,7 @@ argument. Util.py Module ~~~~~~~~~~~~~~ -Various utility functions for use with perf trace: +Various utility functions for use with perf script: nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair nsecs_secs(nsecs) - returns whole secs portion given nsecs @@ -620,4 +620,4 @@ Various utility functions for use with perf trace: SEE ALSO -------- -linkperf:perf-trace[1] +linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-script.txt index 26aff6bf9e5..29ad94293cd 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -1,71 +1,71 @@ -perf-trace(1) +perf-script(1) ============= NAME ---- -perf-trace - Read perf.data (created by perf record) and display trace output +perf-script - Read perf.data (created by perf record) and display trace output SYNOPSIS -------- [verse] -'perf trace' [<options>] -'perf trace' [<options>] record <script> [<record-options>] <command> -'perf trace' [<options>] report <script> [script-args] -'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command> -'perf trace' [<options>] <top-script> [script-args] +'perf script' [<options>] +'perf script' [<options>] record <script> [<record-options>] <command> +'perf script' [<options>] report <script> [script-args] +'perf script' [<options>] <script> <required-script-args> [<record-options>] <command> +'perf script' [<options>] <top-script> [script-args] DESCRIPTION ----------- This command reads the input file and displays the trace recorded. -There are several variants of perf trace: +There are several variants of perf script: - 'perf trace' to see a detailed trace of the workload that was + 'perf script' to see a detailed trace of the workload that was recorded. You can also run a set of pre-canned scripts that aggregate and summarize the raw trace data in various ways (the list of scripts is - available via 'perf trace -l'). The following variants allow you to + available via 'perf script -l'). The following variants allow you to record and run those scripts: - 'perf trace record <script> <command>' to record the events required - for 'perf trace report'. <script> is the name displayed in the - output of 'perf trace --list' i.e. the actual script name minus any + 'perf script record <script> <command>' to record the events required + for 'perf script report'. <script> is the name displayed in the + output of 'perf script --list' i.e. the actual script name minus any language extension. If <command> is not specified, the events are recorded using the -a (system-wide) 'perf record' option. - 'perf trace report <script> [args]' to run and display the results + 'perf script report <script> [args]' to run and display the results of <script>. <script> is the name displayed in the output of 'perf trace --list' i.e. the actual script name minus any language - extension. The perf.data output from a previous run of 'perf trace + extension. The perf.data output from a previous run of 'perf script record <script>' is used and should be present for this command to succeed. [args] refers to the (mainly optional) args expected by the script. - 'perf trace <script> <required-script-args> <command>' to both + 'perf script <script> <required-script-args> <command>' to both record the events required for <script> and to run the <script> using 'live-mode' i.e. without writing anything to disk. <script> - is the name displayed in the output of 'perf trace --list' i.e. the + is the name displayed in the output of 'perf script --list' i.e. the actual script name minus any language extension. If <command> is not specified, the events are recorded using the -a (system-wide) 'perf record' option. If <script> has any required args, they should be specified before <command>. This mode doesn't allow for optional script args to be specified; if optional script args are - desired, they can be specified using separate 'perf trace record' - and 'perf trace report' commands, with the stdout of the record step + desired, they can be specified using separate 'perf script record' + and 'perf script report' commands, with the stdout of the record step piped to the stdin of the report script, using the '-o -' and '-i -' options of the corresponding commands. - 'perf trace <top-script>' to both record the events required for + 'perf script <top-script>' to both record the events required for <top-script> and to run the <top-script> using 'live-mode' i.e. without writing anything to disk. <top-script> is the name - displayed in the output of 'perf trace --list' i.e. the actual + displayed in the output of 'perf script --list' i.e. the actual script name minus any language extension; a <top-script> is defined as any script name ending with the string 'top'. - [<record-options>] can be passed to the record steps of 'perf trace + [<record-options>] can be passed to the record steps of 'perf script record' and 'live-mode' variants; this isn't possible however for - <top-script> 'live-mode' or 'perf trace report' variants. + <top-script> 'live-mode' or 'perf script report' variants. See the 'SEE ALSO' section for links to language-specific information on how to write and run your own trace scripts. @@ -76,7 +76,7 @@ OPTIONS Any command you can specify in a shell. -D:: ---dump-raw-trace=:: +--dump-raw-script=:: Display verbose dump of the trace data. -L:: @@ -95,7 +95,7 @@ OPTIONS -g:: --gen-script=:: - Generate perf-trace.[ext] starter script for given language, + Generate perf-script.[ext] starter script for given language, using current perf.data. -a:: @@ -104,8 +104,15 @@ OPTIONS normally don't - this option allows the latter to be run in system-wide mode. +-i:: +--input=:: + Input file name. + +-d:: +--debug-mode:: + Do various checks like samples ordering and lost events. SEE ALSO -------- -linkperf:perf-record[1], linkperf:perf-trace-perl[1], -linkperf:perf-trace-python[1] +linkperf:perf-record[1], linkperf:perf-script-perl[1], +linkperf:perf-script-python[1] diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4b3a2d46b43..b6da7affbbe 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics SYNOPSIS -------- [verse] -'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> -'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] +'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command> +'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] DESCRIPTION ----------- @@ -35,24 +35,54 @@ OPTIONS child tasks do not inherit counters -p:: --pid=<pid>:: - stat events on existing pid + stat events on existing process id + +-t:: +--tid=<tid>:: + stat events on existing thread id + -a:: - system-wide collection +--all-cpus:: + system-wide collection from all CPUs -c:: - scale counter values +--scale:: + scale/normalize counter values + +-r:: +--repeat=<n>:: + repeat command and print average + stddev (max: 100) -B:: +--big-num:: print large numbers with thousands' separators according to locale -C:: --cpu=:: -Count only on the list of cpus provided. Multiple CPUs can be provided as a -comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Count only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. In per-thread mode, this option is ignored. The -a option is still necessary to activate system-wide monitoring. Default is to count on all CPUs. +-A:: +--no-aggr:: +Do not aggregate counts across all monitored CPUs in system-wide mode (-a). +This option is only valid in system-wide mode. + +-n:: +--null:: + null run - don't start any counters + +-v:: +--verbose:: + be more verbose (show counter open errors, etc) + +-x SEP:: +--field-separator SEP:: +print counts using a CSV-style output to make it easy to import directly into +spreadsheets. Columns are separated by the string specified in SEP. + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 1c4b5f5b7f7..2c3b462f64b 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -This command does assorted sanity tests, initially thru linked routines but +This command does assorted sanity tests, initially through linked routines but also will look for a directory with more tests in the form of scripts. OPTIONS diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index 4b1788355ec..d7b79e2ba2a 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -38,6 +38,8 @@ OPTIONS --process:: Select the processes to display, by name or PID +--symfs=<directory>:: + Look for files with symbols relative to this directory. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 1f9687663f2..f6eb1cdafb7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -12,7 +12,7 @@ SYNOPSIS DESCRIPTION ----------- -This command generates and displays a performance counter profile in realtime. +This command generates and displays a performance counter profile in real time. OPTIONS @@ -27,8 +27,8 @@ OPTIONS -C <cpu-list>:: --cpu=<cpu>:: -Monitor only on the list of cpus provided. Multiple CPUs can be provided as a -comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default is to monitor all CPUS. -d <seconds>:: @@ -50,6 +50,10 @@ Default is to monitor all CPUS. --count-filter=<count>:: Only display functions with more events than this. +-g:: +--group:: + Put the counters into a counter group. + -F <freq>:: --freq=<freq>:: Profile at this frequency. @@ -68,7 +72,11 @@ Default is to monitor all CPUS. -p <pid>:: --pid=<pid>:: - Profile events on existing pid. + Profile events on existing Process ID. + +-t <tid>:: +--tid=<tid>:: + Profile events on existing thread ID. -r <priority>:: --realtime=<priority>:: @@ -78,6 +86,18 @@ Default is to monitor all CPUS. --sym-annotate=<symbol>:: Annotate this symbol. +-K:: +--hide_kernel_symbols:: + Hide kernel symbols. + +-U:: +--hide_user_symbols:: + Hide user symbols. + +-D:: +--dump-symtab:: + Dump the symbol table used for profiling. + -v:: --verbose:: Be more verbose (show counter open errors, etc). diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 8c7fc0c8f0b..c12659d8cb2 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -7,6 +7,7 @@ include/linux/stringify.h lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h +arch/*/lib/memcpy*.S include/linux/poison.h include/linux/magic.h include/linux/hw_breakpoint.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d1db0f676a4..ac6692cf550 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -185,7 +185,10 @@ ifeq ($(ARCH),i386) ARCH := x86 endif ifeq ($(ARCH),x86_64) + RAW_ARCH := x86_64 ARCH := x86 + ARCH_CFLAGS := -DARCH_X86_64 + ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S endif # CFLAGS and LDFLAGS are for the users to override from the command line. @@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h LIB_H += util/include/linux/string.h LIB_H += util/include/linux/types.h +LIB_H += util/include/linux/linkage.h LIB_H += util/include/asm/asm-offsets.h LIB_H += util/include/asm/bug.h LIB_H += util/include/asm/byteorder.h @@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h LIB_H += util/include/asm/system.h LIB_H += util/include/asm/uaccess.h LIB_H += util/include/dwarf-regs.h +LIB_H += util/include/asm/dwarf2.h +LIB_H += util/include/asm/cpufeature.h LIB_H += perf.h LIB_H += util/cache.h LIB_H += util/callchain.h @@ -417,6 +423,7 @@ LIB_H += util/probe-finder.h LIB_H += util/probe-event.h LIB_H += util/pstack.h LIB_H += util/cpumap.h +LIB_H += $(ARCH_INCLUDE) LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -472,6 +479,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o # Benchmark modules BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o +ifeq ($(RAW_ARCH),x86_64) +BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o +endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o @@ -485,7 +495,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o BUILTIN_OBJS += $(OUTPUT)builtin-stat.o BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-trace.o +BUILTIN_OBJS += $(OUTPUT)builtin-script.o BUILTIN_OBJS += $(OUTPUT)builtin-probe.o BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o @@ -507,7 +517,7 @@ PERFLIBS = $(LIB_FILE) -include config.mak ifndef NO_DWARF -FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) +FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 @@ -554,7 +564,7 @@ ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); else - BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT + BASIC_CFLAGS += -DDWARF_SUPPORT EXTLIBS += -lelf -ldw LIB_OBJS += $(OUTPUT)util/probe-finder.o endif # PERF_HAVE_DWARF_REGS @@ -891,13 +901,14 @@ prefix_SQ = $(subst ','\'',$(prefix)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) -LIBS = $(PERFLIBS) $(EXTLIBS) +LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ $(COMPAT_CFLAGS) LIB_OBJS += $(COMPAT_OBJS) ALL_CFLAGS += $(BASIC_CFLAGS) +ALL_CFLAGS += $(ARCH_CFLAGS) ALL_LDFLAGS += $(BASIC_LDFLAGS) export TAR INSTALL DESTDIR SHELL_PATH diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h new file mode 100644 index 00000000000..a72e36cb539 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -0,0 +1,12 @@ + +#ifdef ARCH_X86_64 + +#define MEMCPY_FN(fn, name, desc) \ + extern void *fn(void *, const void *, size_t); + +#include "mem-memcpy-x86-64-asm-def.h" + +#undef MEMCPY_FN + +#endif + diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h new file mode 100644 index 00000000000..d588b87696f --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -0,0 +1,4 @@ + +MEMCPY_FN(__memcpy, + "x86-64-unrolled", + "unrolled memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S new file mode 100644 index 00000000000..a57b66e853c --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -0,0 +1,2 @@ + +#include "../../../arch/x86/lib/memcpy_64.S" diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 38dae746514..db82021f4b9 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -12,6 +12,7 @@ #include "../util/parse-options.h" #include "../util/header.h" #include "bench.h" +#include "mem-memcpy-arch.h" #include <stdio.h> #include <stdlib.h> @@ -23,8 +24,10 @@ static const char *length_str = "1MB"; static const char *routine = "default"; -static bool use_clock = false; +static bool use_clock; static int clock_fd; +static bool only_prefault; +static bool no_prefault; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", @@ -34,19 +37,33 @@ static const struct option options[] = { "Specify routine to copy"), OPT_BOOLEAN('c', "clock", &use_clock, "Use CPU clock for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memcpy()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memcpy()"), OPT_END() }; +typedef void *(*memcpy_t)(void *, const void *, size_t); + struct routine { const char *name; const char *desc; - void * (*fn)(void *dst, const void *src, size_t len); + memcpy_t fn; }; struct routine routines[] = { { "default", "Default memcpy() provided by glibc", memcpy }, +#ifdef ARCH_X86_64 + +#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memcpy-x86-64-asm-def.h" +#undef MEMCPY_FN + +#endif + { NULL, NULL, NULL } @@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts) (double)ts->tv_usec / (double)1000000; } +static void alloc_mem(void **dst, void **src, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); + + *src = zalloc(length); + if (!src) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) +{ + u64 clock_start = 0ULL, clock_end = 0ULL; + void *src = NULL, *dst = NULL; + + alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + clock_start = get_clock(); + fn(dst, src, len); + clock_end = get_clock(); + + free(src); + free(dst); + return clock_end - clock_start; +} + +static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *src = NULL, *dst = NULL; + + alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + fn(dst, src, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used) { int i; - void *dst, *src; - size_t length; - double bps = 0.0; - struct timeval tv_start, tv_end, tv_diff; - u64 clock_start, clock_end, clock_diff; + size_t len; + double result_bps[2]; + u64 result_clock[2]; - clock_start = clock_end = clock_diff = 0ULL; argc = parse_options(argc, argv, options, bench_mem_memcpy_usage, 0); - tv_diff.tv_sec = 0; - tv_diff.tv_usec = 0; - length = (size_t)perf_atoll((char *)length_str); + if (use_clock) + init_clock(); + + len = (size_t)perf_atoll((char *)length_str); - if ((s64)length <= 0) { + result_clock[0] = result_clock[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { fprintf(stderr, "Invalid length:%s\n", length_str); return 1; } + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + for (i = 0; routines[i].name; i++) { if (!strcmp(routines[i].name, routine)) break; @@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv, return 1; } - dst = zalloc(length); - if (!dst) - die("memory allocation failed - maybe length is too large?\n"); - - src = zalloc(length); - if (!src) - die("memory allocation failed - maybe length is too large?\n"); - - if (bench_format == BENCH_FORMAT_DEFAULT) { - printf("# Copying %s Bytes from %p to %p ...\n\n", - length_str, src, dst); - } - - if (use_clock) { - init_clock(); - clock_start = get_clock(); - } else { - BUG_ON(gettimeofday(&tv_start, NULL)); - } - - routines[i].fn(dst, src, length); + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); - if (use_clock) { - clock_end = get_clock(); - clock_diff = clock_end - clock_start; + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_clock) { + result_clock[0] = + do_memcpy_clock(routines[i].fn, len, false); + result_clock[1] = + do_memcpy_clock(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memcpy_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memcpy_gettimeofday(routines[i].fn, + len, true); + } } else { - BUG_ON(gettimeofday(&tv_end, NULL)); - timersub(&tv_end, &tv_start, &tv_diff); - bps = (double)((double)length / timeval2double(&tv_diff)); + if (use_clock) { + result_clock[pf] = + do_memcpy_clock(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memcpy_gettimeofday(routines[i].fn, + len, only_prefault); + } } switch (bench_format) { case BENCH_FORMAT_DEFAULT: - if (use_clock) { - printf(" %14lf Clock/Byte\n", - (double)clock_diff / (double)length); - } else { - if (bps < K) - printf(" %14lf B/Sec\n", bps); - else if (bps < K * K) - printf(" %14lfd KB/Sec\n", bps / 1024); - else if (bps < K * K * K) - printf(" %14lf MB/Sec\n", bps / 1024 / 1024); - else { - printf(" %14lf GB/Sec\n", - bps / 1024 / 1024 / 1024); + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)result_clock[0] + / (double)len); + printf(" %14lf Clock/Byte (with prefault)\n", + (double)result_clock[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); } + } else { + if (use_clock) { + printf(" %14lf Clock/Byte", + (double)result_clock[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); } break; case BENCH_FORMAT_SIMPLE: - if (use_clock) { - printf("%14lf\n", - (double)clock_diff / (double)length); - } else - printf("%lf\n", bps); + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf("%lf %lf\n", + (double)result_clock[0] / (double)len, + (double)result_clock[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_clock) { + printf("%lf\n", (double)result_clock[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } break; default: /* reaching this means there's some disaster: */ diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6d5604d8df9..c056cdc0691 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) return hist_entry__inc_addr_samples(he, al->addr); } -static int process_sample_event(event_t *event, struct perf_session *session) +static int process_sample_event(event_t *event, struct sample_data *sample, + struct perf_session *session) { struct addr_location al; - struct sample_data data; - if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { + if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = { .mmap = event__process_mmap, .comm = event__process_comm, .fork = event__process_task, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; static int __cmd_annotate(void) @@ -382,7 +384,7 @@ static int __cmd_annotate(void) int ret; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, force, false); + session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index c49837de7d3..5af32ae9031 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -38,7 +38,8 @@ static int __cmd_buildid_list(void) { struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, force, false); + session = perf_session__new(input_name, O_RDONLY, force, false, + &build_id__mark_dso_hit_ops); if (session == NULL) return -1; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index fca1d440291..3153e492dbc 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self, return -ENOMEM; } -static int diff__process_sample_event(event_t *event, struct perf_session *session) +static int diff__process_sample_event(event_t *event, + struct sample_data *sample, + struct perf_session *session) { struct addr_location al; - struct sample_data data = { .period = 1, }; - if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { + if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi if (al.filtered || al.sym == NULL) return 0; - if (hists__add_entry(&session->hists, &al, data.period)) { + if (hists__add_entry(&session->hists, &al, sample->period)) { pr_warning("problem incrementing symbol period, skipping event\n"); return -1; } - session->hists.stats.total_period += data.period; + session->hists.stats.total_period += sample->period; return 0; } @@ -60,6 +61,8 @@ static struct perf_event_ops event_ops = { .exit = event__process_task, .fork = event__process_task, .lost = event__process_lost, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; static void perf_session__insert_hist_entry_by_name(struct rb_root *root, @@ -141,8 +144,8 @@ static int __cmd_diff(void) int ret, i; struct perf_session *session[2]; - session[0] = perf_session__new(input_old, O_RDONLY, force, false); - session[1] = perf_session__new(input_new, O_RDONLY, force, false); + session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops); + session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops); if (session[0] == NULL || session[1] == NULL) return -ENOMEM; @@ -173,7 +176,7 @@ static const char * const diff_usage[] = { static const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('m', "displacement", &show_displacement, + OPT_BOOLEAN('M', "displacement", &show_displacement, "Show position displacement relative to baseline"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), @@ -191,6 +194,8 @@ static const struct option options[] = { OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), OPT_END() }; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8e3e47b064c..0c78ffa7bf6 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -16,8 +16,8 @@ static char const *input_name = "-"; static bool inject_build_ids; -static int event__repipe(event_t *event __used, - struct perf_session *session __used) +static int event__repipe_synth(event_t *event, + struct perf_session *session __used) { uint32_t size; void *buf = event; @@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used, return 0; } -static int event__repipe_mmap(event_t *self, struct perf_session *session) +static int event__repipe(event_t *event, struct sample_data *sample __used, + struct perf_session *session) +{ + return event__repipe_synth(event, session); +} + +static int event__repipe_mmap(event_t *self, struct sample_data *sample, + struct perf_session *session) { int err; - err = event__process_mmap(self, session); - event__repipe(self, session); + err = event__process_mmap(self, sample, session); + event__repipe(self, sample, session); return err; } -static int event__repipe_task(event_t *self, struct perf_session *session) +static int event__repipe_task(event_t *self, struct sample_data *sample, + struct perf_session *session) { int err; - err = event__process_task(self, session); - event__repipe(self, session); + err = event__process_task(self, sample, session); + event__repipe(self, sample, session); return err; } @@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self, { int err; - event__repipe(self, session); + event__repipe_synth(self, session); err = event__process_tracing_data(self, session); return err; @@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) return 0; } -static int event__inject_buildid(event_t *event, struct perf_session *session) +static int event__inject_buildid(event_t *event, struct sample_data *sample, + struct perf_session *session) { struct addr_location al; struct thread *thread; @@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session) } repipe: - event__repipe(event, session); + event__repipe(event, sample, session); return 0; } @@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = { .read = event__repipe, .throttle = event__repipe, .unthrottle = event__repipe, - .attr = event__repipe, - .event_type = event__repipe, - .tracing_data = event__repipe, - .build_id = event__repipe, + .attr = event__repipe_synth, + .event_type = event__repipe_synth, + .tracing_data = event__repipe_synth, + .build_id = event__repipe_synth, }; extern volatile int session_done; @@ -187,7 +196,7 @@ static int __cmd_inject(void) inject_ops.tracing_data = event__repipe_tracing_data; } - session = perf_session__new(input_name, O_RDONLY, false, true); + session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 31f60a2535e..def7ddc2fd4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data, } } -static int process_sample_event(event_t *event, struct perf_session *session) +static int process_sample_event(event_t *event, struct sample_data *sample, + struct perf_session *session) { - struct sample_data data; - struct thread *thread; + struct thread *thread = perf_session__findnew(session, event->ip.pid); - memset(&data, 0, sizeof(data)); - data.time = -1; - data.cpu = -1; - data.period = 1; - - event__parse_sample(event, session->sample_type, &data); - - dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, - data.pid, data.tid, data.ip, data.period); - - thread = perf_session__findnew(session, event->ip.pid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); @@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(event, data.raw_data, data.cpu, - data.time, thread); + process_raw_event(event, sample->raw_data, sample->cpu, + sample->time, thread); return 0; } @@ -492,7 +481,8 @@ static void sort_result(void) static int __cmd_kmem(void) { int err = -EINVAL; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); + struct perf_session *session = perf_session__new(input_name, O_RDONLY, + 0, false, &event_ops); if (session == NULL) return -ENOMEM; @@ -747,6 +737,9 @@ static int __cmd_record(int argc, const char **argv) rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (rec_argv == NULL) + return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 821c1586a22..b9c6e543297 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -834,22 +834,18 @@ static void dump_info(void) die("Unknown type of information\n"); } -static int process_sample_event(event_t *self, struct perf_session *s) +static int process_sample_event(event_t *self, struct sample_data *sample, + struct perf_session *s) { - struct sample_data data; - struct thread *thread; + struct thread *thread = perf_session__findnew(s, sample->tid); - bzero(&data, sizeof(data)); - event__parse_sample(self, s->sample_type, &data); - - thread = perf_session__findnew(s, data.tid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", self->header.type); return -1; } - process_raw_event(data.raw_data, data.cpu, data.time, thread); + process_raw_event(sample->raw_data, sample->cpu, sample->time, thread); return 0; } @@ -862,7 +858,7 @@ static struct perf_event_ops eops = { static int read_events(void) { - session = perf_session__new(input_name, O_RDONLY, 0, false); + session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); if (!session) die("Initializing perf session failed\n"); @@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv) rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (rec_argv == NULL) + return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); @@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used) usage_with_options(report_usage, report_options); } __cmd_report(); - } else if (!strcmp(argv[0], "trace")) { - /* Aliased to 'perf trace' */ - return cmd_trace(argc, argv, prefix); + } else if (!strcmp(argv[0], "script")) { + /* Aliased to 'perf script' */ + return cmd_script(argc, argv, prefix); } else if (!strcmp(argv[0], "info")) { if (argc) { argc = parse_options(argc, argv, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 564491fa18b..50efbd509b8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -36,6 +36,7 @@ static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; static u64 user_interval = ULLONG_MAX; static u64 default_interval = 0; +static u64 sample_type; static int nr_cpus = 0; static unsigned int page_size; @@ -48,6 +49,7 @@ static const char *output_name = "perf.data"; static int group = 0; static int realtime_prio = 0; static bool raw_samples = false; +static bool sample_id_all_avail = true; static bool system_wide = false; static pid_t target_pid = -1; static pid_t target_tid = -1; @@ -60,7 +62,9 @@ static bool call_graph = false; static bool inherit_stat = false; static bool no_samples = false; static bool sample_address = false; +static bool sample_time = false; static bool no_buildid = false; +static bool no_buildid_cache = false; static long samples = 0; static u64 bytes_written = 0; @@ -128,6 +132,7 @@ static void write_output(void *buf, size_t size) } static int process_synthesized_event(event_t *event, + struct sample_data *sample __used, struct perf_session *self __used) { write_output(event, event->header.size); @@ -238,6 +243,19 @@ static void create_counter(int counter, int cpu) u64 time_running; u64 id; } read_data; + /* + * Check if parse_single_tracepoint_event has already asked for + * PERF_SAMPLE_TIME. + * + * XXX this is kludgy but short term fix for problems introduced by + * eac23d1c that broke 'perf script' by having different sample_types + * when using multiple tracepoint events when we use a perf binary + * that tries to use sample_id_all on an older kernel. + * + * We need to move counter creation to perf_session, support + * different sample_types, etc. + */ + bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | @@ -280,6 +298,10 @@ static void create_counter(int counter, int cpu) if (system_wide) attr->sample_type |= PERF_SAMPLE_CPU; + if (sample_id_all_avail && + (sample_time || system_wide || !no_inherit || cpu_list)) + attr->sample_type |= PERF_SAMPLE_TIME; + if (raw_samples) { attr->sample_type |= PERF_SAMPLE_TIME; attr->sample_type |= PERF_SAMPLE_RAW; @@ -293,6 +315,8 @@ static void create_counter(int counter, int cpu) attr->disabled = 1; attr->enable_on_exec = 1; } +retry_sample_id: + attr->sample_id_all = sample_id_all_avail ? 1 : 0; for (thread_index = 0; thread_index < thread_num; thread_index++) { try_again: @@ -309,6 +333,15 @@ try_again: else if (err == ENODEV && cpu_list) { die("No such device - did you specify" " an out-of-range profile CPU?\n"); + } else if (err == EINVAL && sample_id_all_avail) { + /* + * Old kernel, no attr->sample_id_type_all field + */ + sample_id_all_avail = false; + if (!sample_time && !raw_samples && !time_needed) + attr->sample_type &= ~PERF_SAMPLE_TIME; + + goto retry_sample_id; } /* @@ -326,7 +359,7 @@ try_again: goto try_again; } printf("\n"); - error("perfcounter syscall returned with %d (%s)\n", + error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", fd[nr_cpu][counter][thread_index], strerror(err)); #if defined(__i386__) || defined(__x86_64__) @@ -406,6 +439,9 @@ try_again: } } } + + if (!sample_type) + sample_type = attr->sample_type; } static void open_counters(int cpu) @@ -437,7 +473,8 @@ static void atexit_header(void) if (!pipe_output) { session->header.data_size += bytes_written; - process_buildids(); + if (!no_buildid) + process_buildids(); perf_header__write(&session->header, output, true); perf_session__delete(session); symbol__exit(); @@ -552,12 +589,15 @@ static int __cmd_record(int argc, const char **argv) } session = perf_session__new(output_name, O_WRONLY, - write_mode == WRITE_FORCE, false); + write_mode == WRITE_FORCE, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; } + if (!no_buildid) + perf_header__set_feat(&session->header, HEADER_BUILD_ID); + if (!file_new) { err = perf_header__read(session, output); if (err < 0) @@ -639,6 +679,8 @@ static int __cmd_record(int argc, const char **argv) open_counters(cpumap[i]); } + perf_session__set_sample_type(session, sample_type); + if (pipe_output) { err = perf_header__write_pipe(output); if (err < 0) @@ -651,6 +693,8 @@ static int __cmd_record(int argc, const char **argv) post_processing_offset = lseek(output, 0, SEEK_CUR); + perf_session__set_sample_id_all(session, sample_id_all_avail); + if (pipe_output) { err = event__synthesize_attrs(&session->header, process_synthesized_event, @@ -831,10 +875,13 @@ const struct option record_options[] = { "per thread counts"), OPT_BOOLEAN('d', "data", &sample_address, "Sample addresses"), + OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), OPT_BOOLEAN('n', "no-samples", &no_samples, "don't sample"), - OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid, + OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, "do not update the buildid cache"), + OPT_BOOLEAN('B', "no-buildid", &no_buildid, + "do not collect buildids in perf.data"), OPT_END() }; @@ -859,7 +906,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) } symbol__init(); - if (no_buildid) + + if (no_buildid_cache || no_buildid) disable_buildid_cache(); if (!nr_counters) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5de405d4523..75183a4518e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session, return 0; } -static int process_sample_event(event_t *event, struct perf_session *session) +static int process_sample_event(event_t *event, struct sample_data *sample, + struct perf_session *session) { - struct sample_data data = { .period = 1, }; struct addr_location al; struct perf_event_attr *attr; - if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { + if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session) if (al.filtered || (hide_unresolved && al.sym == NULL)) return 0; - if (perf_session__add_hist_entry(session, &al, &data)) { + if (perf_session__add_hist_entry(session, &al, sample)) { pr_debug("problem incrementing symbol period, skipping event\n"); return -1; } - attr = perf_header__find_attr(data.id, &session->header); + attr = perf_header__find_attr(sample->id, &session->header); - if (add_event_total(session, &data, attr)) { + if (add_event_total(session, sample, attr)) { pr_debug("problem adding event period\n"); return -1; } @@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session) return 0; } -static int process_read_event(event_t *event, struct perf_session *session __used) +static int process_read_event(event_t *event, struct sample_data *sample __used, + struct perf_session *session __used) { struct perf_event_attr *attr; @@ -243,6 +244,8 @@ static struct perf_event_ops event_ops = { .event_type = event__process_event_type, .tracing_data = event__process_tracing_data, .build_id = event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; extern volatile int session_done; @@ -307,7 +310,7 @@ static int __cmd_report(void) signal(SIGINT, sig_handler); - session = perf_session__new(input_name, O_RDONLY, force, false); + session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); if (session == NULL) return -ENOMEM; @@ -442,6 +445,8 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), @@ -478,6 +483,8 @@ static const struct option options[] = { "columns '.' is reserved."), OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, "Only display entries resolved to a symbol"), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), OPT_END() }; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 55f3b5dcc73..7a4ebeb8b01 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session, process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); } -static int process_sample_event(event_t *event, struct perf_session *session) +static int process_sample_event(event_t *event, struct sample_data *sample, + struct perf_session *session) { - struct sample_data data; struct thread *thread; if (!(session->sample_type & PERF_SAMPLE_RAW)) return 0; - memset(&data, 0, sizeof(data)); - data.time = -1; - data.cpu = -1; - data.period = -1; - - event__parse_sample(event, session->sample_type, &data); - - dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, - data.pid, data.tid, data.ip, data.period); - - thread = perf_session__findnew(session, data.pid); + thread = perf_session__findnew(session, sample->pid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); @@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session) dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (profile_cpu != -1 && profile_cpu != (int)data.cpu) + if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) return 0; - process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread); + process_raw_event(event, session, sample->raw_data, sample->cpu, + sample->time, thread); return 0; } @@ -1652,7 +1643,8 @@ static struct perf_event_ops event_ops = { static int read_events(void) { int err = -EINVAL; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); + struct perf_session *session = perf_session__new(input_name, O_RDONLY, + 0, false, &event_ops); if (session == NULL) return -ENOMEM; @@ -1869,6 +1861,9 @@ static int __cmd_record(int argc, const char **argv) rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (rec_argv) + return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); @@ -1888,10 +1883,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) usage_with_options(sched_usage, sched_options); /* - * Aliased to 'perf trace' for now: + * Aliased to 'perf script' for now: */ - if (!strcmp(argv[0], "trace")) - return cmd_trace(argc, argv, prefix); + if (!strcmp(argv[0], "script")) + return cmd_script(argc, argv, prefix); symbol__init(); if (!strncmp(argv[0], "rec", 3)) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-script.c index 86cfe3800e6..43480fd66db 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-script.c @@ -56,29 +56,18 @@ static void setup_scripting(void) static int cleanup_scripting(void) { - pr_debug("\nperf trace script stopped\n"); + pr_debug("\nperf script stopped\n"); return scripting_ops->stop_script(); } static char const *input_name = "perf.data"; -static int process_sample_event(event_t *event, struct perf_session *session) +static int process_sample_event(event_t *event, struct sample_data *sample, + struct perf_session *session) { - struct sample_data data; - struct thread *thread; + struct thread *thread = perf_session__findnew(session, event->ip.pid); - memset(&data, 0, sizeof(data)); - data.time = -1; - data.cpu = -1; - data.period = 1; - - event__parse_sample(event, session->sample_type, &data); - - dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, - data.pid, data.tid, data.ip, data.period); - - thread = perf_session__findnew(session, event->ip.pid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); @@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session) if (session->sample_type & PERF_SAMPLE_RAW) { if (debug_mode) { - if (data.time < last_timestamp) { + if (sample->time < last_timestamp) { pr_err("Samples misordered, previous: %llu " "this: %llu\n", last_timestamp, - data.time); + sample->time); nr_unordered++; } - last_timestamp = data.time; + last_timestamp = sample->time; return 0; } /* @@ -101,18 +90,19 @@ static int process_sample_event(event_t *event, struct perf_session *session) * field, although it should be the same than this perf * event pid */ - scripting_ops->process_event(data.cpu, data.raw_data, - data.raw_size, - data.time, thread->comm); + scripting_ops->process_event(sample->cpu, sample->raw_data, + sample->raw_size, + sample->time, thread->comm); } - session->hists.stats.total_period += data.period; + session->hists.stats.total_period += sample->period; return 0; } static u64 nr_lost; -static int process_lost_event(event_t *event, struct perf_session *session __used) +static int process_lost_event(event_t *event, struct sample_data *sample __used, + struct perf_session *session __used) { nr_lost += event->lost.lost; @@ -127,6 +117,7 @@ static struct perf_event_ops event_ops = { .tracing_data = event__process_tracing_data, .build_id = event__process_build_id, .lost = process_lost_event, + .ordering_requires_timestamps = true, .ordered_samples = true, }; @@ -137,7 +128,7 @@ static void sig_handler(int sig __unused) session_done = 1; } -static int __cmd_trace(struct perf_session *session) +static int __cmd_script(struct perf_session *session) { int ret; @@ -247,7 +238,7 @@ static void list_available_languages(void) fprintf(stderr, "\n"); fprintf(stderr, "Scripting language extensions (used in " - "perf trace -s [spec:]script.[spec]):\n\n"); + "perf script -s [spec:]script.[spec]):\n\n"); list_for_each_entry(s, &script_specs, node) fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); @@ -301,17 +292,34 @@ static int parse_scriptname(const struct option *opt __used, return 0; } -#define for_each_lang(scripts_dir, lang_dirent, lang_next) \ +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +static int is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return 0; + + return S_ISDIR(st.st_mode); +} + +#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\ while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ lang_next) \ - if (lang_dirent.d_type == DT_DIR && \ + if ((lang_dirent.d_type == DT_DIR || \ + (lang_dirent.d_type == DT_UNKNOWN && \ + is_directory(scripts_path, &lang_dirent))) && \ (strcmp(lang_dirent.d_name, ".")) && \ (strcmp(lang_dirent.d_name, ".."))) -#define for_each_script(lang_dir, script_dirent, script_next) \ +#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\ while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ script_next) \ - if (script_dirent.d_type != DT_DIR) + if (script_dirent.d_type != DT_DIR && \ + (script_dirent.d_type != DT_UNKNOWN || \ + !is_directory(lang_path, &script_dirent))) #define RECORD_SUFFIX "-record" @@ -380,10 +388,10 @@ out_delete_desc: return NULL; } -static char *ends_with(char *str, const char *suffix) +static const char *ends_with(const char *str, const char *suffix) { size_t suffix_len = strlen(suffix); - char *p = str; + const char *p = str; if (strlen(str) > suffix_len) { p = str + strlen(str) - suffix_len; @@ -466,16 +474,16 @@ static int list_available_scripts(const struct option *opt __used, if (!scripts_dir) return -1; - for_each_lang(scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, lang_dirent.d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_dir, script_dirent, script_next) { + for_each_script(lang_path, lang_dir, script_dirent, script_next) { script_root = strdup(script_dirent.d_name); - str = ends_with(script_root, REPORT_SUFFIX); + str = (char *)ends_with(script_root, REPORT_SUFFIX); if (str) { *str = '\0'; desc = script_desc__findnew(script_root); @@ -514,16 +522,16 @@ static char *get_script_path(const char *script_root, const char *suffix) if (!scripts_dir) return NULL; - for_each_lang(scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, lang_dirent.d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_dir, script_dirent, script_next) { + for_each_script(lang_path, lang_dir, script_dirent, script_next) { __script_root = strdup(script_dirent.d_name); - str = ends_with(__script_root, suffix); + str = (char *)ends_with(__script_root, suffix); if (str) { *str = '\0'; if (strcmp(__script_root, script_root)) @@ -543,7 +551,7 @@ static char *get_script_path(const char *script_root, const char *suffix) static bool is_top_script(const char *script_path) { - return ends_with((char *)script_path, "top") == NULL ? false : true; + return ends_with(script_path, "top") == NULL ? false : true; } static int has_required_arg(char *script_path) @@ -569,12 +577,12 @@ out: return n_args; } -static const char * const trace_usage[] = { - "perf trace [<options>]", - "perf trace [<options>] record <script> [<record-options>] <command>", - "perf trace [<options>] report <script> [script-args]", - "perf trace [<options>] <script> [<record-options>] <command>", - "perf trace [<options>] <top-script> [script-args]", +static const char * const script_usage[] = { + "perf script [<options>]", + "perf script [<options>] record <script> [<record-options>] <command>", + "perf script [<options>] report <script> [script-args]", + "perf script [<options>] <script> [<record-options>] <command>", + "perf script [<options>] <top-script> [script-args]", NULL }; @@ -591,7 +599,7 @@ static const struct option options[] = { "script file name (lang:script name, script name, or *)", parse_scriptname), OPT_STRING('g', "gen-script", &generate_script_lang, "lang", - "generate perf-trace.xx script in specified language"), + "generate perf-script.xx script in specified language"), OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_BOOLEAN('d', "debug-mode", &debug_mode, @@ -614,7 +622,7 @@ static bool have_cmd(int argc, const char **argv) return argc != 0; } -int cmd_trace(int argc, const char **argv, const char *prefix __used) +int cmd_script(int argc, const char **argv, const char *prefix __used) { char *rec_script_path = NULL; char *rep_script_path = NULL; @@ -626,7 +634,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) setup_scripting(); - argc = parse_options(argc, argv, options, trace_usage, + argc = parse_options(argc, argv, options, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { @@ -640,7 +648,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) if (!rep_script_path) { fprintf(stderr, "Please specify a valid report script" - "(see 'perf trace -l' for listing)\n"); + "(see 'perf script -l' for listing)\n"); return -1; } } @@ -658,8 +666,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) if (!rec_script_path && !rep_script_path) { fprintf(stderr, " Couldn't find script %s\n\n See perf" - " trace -l for available scripts.\n", argv[0]); - usage_with_options(trace_usage, options); + " script -l for available scripts.\n", argv[0]); + usage_with_options(script_usage, options); } if (is_top_script(argv[0])) { @@ -671,9 +679,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) rec_args = (argc - 1) - rep_args; if (rec_args < 0) { fprintf(stderr, " %s script requires options." - "\n\n See perf trace -l for available " + "\n\n See perf script -l for available " "scripts and options.\n", argv[0]); - usage_with_options(trace_usage, options); + usage_with_options(script_usage, options); } } @@ -772,7 +780,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) if (!script_name) setup_pager(); - session = perf_session__new(input_name, O_RDONLY, 0, false); + session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops); if (session == NULL) return -ENOMEM; @@ -806,7 +814,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) return -1; } - err = scripting_ops->generate_script("perf-trace"); + err = scripting_ops->generate_script("perf-script"); goto out; } @@ -814,10 +822,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) err = scripting_ops->start_script(script_name, argc, argv); if (err) goto out; - pr_debug("perf trace started with script %s\n\n", script_name); + pr_debug("perf script started with script %s\n\n", script_name); } - err = __cmd_trace(session); + err = __cmd_script(session); perf_session__delete(session); cleanup_scripting(); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a6b4d44f950..7ff746da7e6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -52,6 +52,8 @@ #include <math.h> #include <locale.h> +#define DEFAULT_SEPARATOR " " + static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, @@ -75,20 +77,30 @@ static int run_idx = 0; static int run_count = 1; static bool no_inherit = false; static bool scale = true; +static bool no_aggr = false; static pid_t target_pid = -1; static pid_t target_tid = -1; static pid_t *all_tids = NULL; static int thread_num = 0; static pid_t child_pid = -1; static bool null_run = false; -static bool big_num = false; +static bool big_num = true; +static int big_num_opt = -1; static const char *cpu_list; +static const char *csv_sep = NULL; +static bool csv_output = false; static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; static int event_scaled[MAX_COUNTERS]; +static struct { + u64 val; + u64 ena; + u64 run; +} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS]; + static volatile int done = 0; struct stats @@ -136,19 +148,19 @@ static double stddev_stats(struct stats *stats) } struct stats event_res_stats[MAX_COUNTERS][3]; -struct stats runtime_nsecs_stats; +struct stats runtime_nsecs_stats[MAX_NR_CPUS]; +struct stats runtime_cycles_stats[MAX_NR_CPUS]; +struct stats runtime_branches_stats[MAX_NR_CPUS]; struct stats walltime_nsecs_stats; -struct stats runtime_cycles_stats; -struct stats runtime_branches_stats; #define MATCH_EVENT(t, c, counter) \ (attrs[counter].type == PERF_TYPE_##t && \ attrs[counter].config == PERF_COUNT_##c) #define ERR_PERF_OPEN \ -"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" +"counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information." -static int create_perf_stat_counter(int counter) +static int create_perf_stat_counter(int counter, bool *perm_err) { struct perf_event_attr *attr = attrs + counter; int thread; @@ -164,11 +176,14 @@ static int create_perf_stat_counter(int counter) for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter][0] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); - if (fd[cpu][counter][0] < 0) - pr_debug(ERR_PERF_OPEN, counter, + if (fd[cpu][counter][0] < 0) { + if (errno == EPERM || errno == EACCES) + *perm_err = true; + error(ERR_PERF_OPEN, counter, fd[cpu][counter][0], strerror(errno)); - else + } else { ++ncreated; + } } } else { attr->inherit = !no_inherit; @@ -179,12 +194,15 @@ static int create_perf_stat_counter(int counter) for (thread = 0; thread < thread_num; thread++) { fd[0][counter][thread] = sys_perf_event_open(attr, all_tids[thread], -1, -1, 0); - if (fd[0][counter][thread] < 0) - pr_debug(ERR_PERF_OPEN, counter, + if (fd[0][counter][thread] < 0) { + if (errno == EPERM || errno == EACCES) + *perm_err = true; + error(ERR_PERF_OPEN, counter, fd[0][counter][thread], strerror(errno)); - else + } else { ++ncreated; + } } } @@ -205,8 +223,9 @@ static inline int nsec_counter(int counter) /* * Read out the results of a single counter: + * aggregate counts across CPUs in system-wide mode */ -static void read_counter(int counter) +static void read_counter_aggr(int counter) { u64 count[3], single_count[3]; int cpu; @@ -264,11 +283,58 @@ static void read_counter(int counter) * Save the full runtime - to allow normalization during printout: */ if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) - update_stats(&runtime_nsecs_stats, count[0]); + update_stats(&runtime_nsecs_stats[0], count[0]); if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) - update_stats(&runtime_cycles_stats, count[0]); + update_stats(&runtime_cycles_stats[0], count[0]); if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) - update_stats(&runtime_branches_stats, count[0]); + update_stats(&runtime_branches_stats[0], count[0]); +} + +/* + * Read out the results of a single counter: + * do not aggregate counts across CPUs in system-wide mode + */ +static void read_counter(int counter) +{ + u64 count[3]; + int cpu; + size_t res, nv; + + count[0] = count[1] = count[2] = 0; + + nv = scale ? 3 : 1; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + + if (fd[cpu][counter][0] < 0) + continue; + + res = read(fd[cpu][counter][0], count, nv * sizeof(u64)); + + assert(res == nv * sizeof(u64)); + + close(fd[cpu][counter][0]); + fd[cpu][counter][0] = -1; + + if (scale) { + if (count[2] == 0) { + count[0] = 0; + } else if (count[2] < count[1]) { + count[0] = (unsigned long long) + ((double)count[0] * count[1] / count[2] + 0.5); + } + } + cpu_counts[cpu][counter].val = count[0]; /* scaled count */ + cpu_counts[cpu][counter].ena = count[1]; + cpu_counts[cpu][counter].run = count[2]; + + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) + update_stats(&runtime_nsecs_stats[cpu], count[0]); + if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) + update_stats(&runtime_cycles_stats[cpu], count[0]); + if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) + update_stats(&runtime_branches_stats[cpu], count[0]); + } } static int run_perf_stat(int argc __used, const char **argv) @@ -277,6 +343,7 @@ static int run_perf_stat(int argc __used, const char **argv) int status = 0; int counter, ncreated = 0; int child_ready_pipe[2], go_pipe[2]; + bool perm_err = false; const bool forks = (argc > 0); char buf; @@ -335,12 +402,15 @@ static int run_perf_stat(int argc __used, const char **argv) } for (counter = 0; counter < nr_counters; counter++) - ncreated += create_perf_stat_counter(counter); - - if (ncreated == 0) { - pr_err("No permission to collect %sstats.\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", - system_wide ? "system-wide " : ""); + ncreated += create_perf_stat_counter(counter, &perm_err); + + if (ncreated < nr_counters) { + if (perm_err) + error("You may not have permission to collect %sstats.\n" + "\t Consider tweaking" + " /proc/sys/kernel/perf_event_paranoid or running as root.", + system_wide ? "system-wide " : ""); + die("Not all events could be opened.\n"); if (child_pid != -1) kill(child_pid, SIGTERM); return -1; @@ -362,9 +432,13 @@ static int run_perf_stat(int argc __used, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); - for (counter = 0; counter < nr_counters; counter++) - read_counter(counter); - + if (no_aggr) { + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + } else { + for (counter = 0; counter < nr_counters; counter++) + read_counter_aggr(counter); + } return WEXITSTATUS(status); } @@ -377,11 +451,21 @@ static void print_noise(int counter, double avg) 100 * stddev_stats(&event_res_stats[counter][0]) / avg); } -static void nsec_printout(int counter, double avg) +static void nsec_printout(int cpu, int counter, double avg) { double msecs = avg / 1e6; + char cpustr[16] = { '\0', }; + const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; + + if (no_aggr) + sprintf(cpustr, "CPU%*d%s", + csv_output ? 0 : -4, + cpumap[cpu], csv_sep); - fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); + fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(counter)); + + if (csv_output) + return; if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { fprintf(stderr, " # %10.3f CPUs ", @@ -389,33 +473,49 @@ static void nsec_printout(int counter, double avg) } } -static void abs_printout(int counter, double avg) +static void abs_printout(int cpu, int counter, double avg) { double total, ratio = 0.0; + char cpustr[16] = { '\0', }; + const char *fmt; + + if (csv_output) + fmt = "%s%.0f%s%s"; + else if (big_num) + fmt = "%s%'18.0f%s%-24s"; + else + fmt = "%s%18.0f%s%-24s"; - if (big_num) - fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); + if (no_aggr) + sprintf(cpustr, "CPU%*d%s", + csv_output ? 0 : -4, + cpumap[cpu], csv_sep); else - fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); + cpu = 0; + + fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(counter)); + + if (csv_output) + return; if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { - total = avg_stats(&runtime_cycles_stats); + total = avg_stats(&runtime_cycles_stats[cpu]); if (total) ratio = avg / total; fprintf(stderr, " # %10.3f IPC ", ratio); } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && - runtime_branches_stats.n != 0) { - total = avg_stats(&runtime_branches_stats); + runtime_branches_stats[cpu].n != 0) { + total = avg_stats(&runtime_branches_stats[cpu]); if (total) ratio = avg * 100 / total; fprintf(stderr, " # %10.3f %% ", ratio); - } else if (runtime_nsecs_stats.n != 0) { - total = avg_stats(&runtime_nsecs_stats); + } else if (runtime_nsecs_stats[cpu].n != 0) { + total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) ratio = 1000.0 * avg / total; @@ -426,22 +526,29 @@ static void abs_printout(int counter, double avg) /* * Print out the results of a single counter: + * aggregated counts in system-wide mode */ -static void print_counter(int counter) +static void print_counter_aggr(int counter) { double avg = avg_stats(&event_res_stats[counter][0]); int scaled = event_scaled[counter]; if (scaled == -1) { - fprintf(stderr, " %18s %-24s\n", - "<not counted>", event_name(counter)); + fprintf(stderr, "%*s%s%-24s\n", + csv_output ? 0 : 18, + "<not counted>", csv_sep, event_name(counter)); return; } if (nsec_counter(counter)) - nsec_printout(counter, avg); + nsec_printout(-1, counter, avg); else - abs_printout(counter, avg); + abs_printout(-1, counter, avg); + + if (csv_output) { + fputc('\n', stderr); + return; + } print_noise(counter, avg); @@ -458,40 +565,91 @@ static void print_counter(int counter) fprintf(stderr, "\n"); } +/* + * Print out the results of a single counter: + * does not use aggregated count in system-wide + */ +static void print_counter(int counter) +{ + u64 ena, run, val; + int cpu; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + val = cpu_counts[cpu][counter].val; + ena = cpu_counts[cpu][counter].ena; + run = cpu_counts[cpu][counter].run; + if (run == 0 || ena == 0) { + fprintf(stderr, "CPU%*d%s%*s%s%-24s", + csv_output ? 0 : -4, + cpumap[cpu], csv_sep, + csv_output ? 0 : 18, + "<not counted>", csv_sep, + event_name(counter)); + + fprintf(stderr, "\n"); + continue; + } + + if (nsec_counter(counter)) + nsec_printout(cpu, counter, val); + else + abs_printout(cpu, counter, val); + + if (!csv_output) { + print_noise(counter, 1.0); + + if (run != ena) { + fprintf(stderr, " (scaled from %.2f%%)", + 100.0 * run / ena); + } + } + fprintf(stderr, "\n"); + } +} + static void print_stat(int argc, const char **argv) { int i, counter; fflush(stdout); - fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for "); - if(target_pid == -1 && target_tid == -1) { - fprintf(stderr, "\'%s", argv[0]); - for (i = 1; i < argc; i++) - fprintf(stderr, " %s", argv[i]); - } else if (target_pid != -1) - fprintf(stderr, "process id \'%d", target_pid); - else - fprintf(stderr, "thread id \'%d", target_tid); - - fprintf(stderr, "\'"); - if (run_count > 1) - fprintf(stderr, " (%d runs)", run_count); - fprintf(stderr, ":\n\n"); + if (!csv_output) { + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for "); + if(target_pid == -1 && target_tid == -1) { + fprintf(stderr, "\'%s", argv[0]); + for (i = 1; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + } else if (target_pid != -1) + fprintf(stderr, "process id \'%d", target_pid); + else + fprintf(stderr, "thread id \'%d", target_tid); + + fprintf(stderr, "\'"); + if (run_count > 1) + fprintf(stderr, " (%d runs)", run_count); + fprintf(stderr, ":\n\n"); + } - for (counter = 0; counter < nr_counters; counter++) - print_counter(counter); + if (no_aggr) { + for (counter = 0; counter < nr_counters; counter++) + print_counter(counter); + } else { + for (counter = 0; counter < nr_counters; counter++) + print_counter_aggr(counter); + } - fprintf(stderr, "\n"); - fprintf(stderr, " %18.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats)/1e9); - if (run_count > 1) { - fprintf(stderr, " ( +- %7.3f%% )", + if (!csv_output) { + fprintf(stderr, "\n"); + fprintf(stderr, " %18.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(stderr, " ( +- %7.3f%% )", 100*stddev_stats(&walltime_nsecs_stats) / avg_stats(&walltime_nsecs_stats)); + } + fprintf(stderr, "\n\n"); } - fprintf(stderr, "\n\n"); } static volatile int signr = -1; @@ -521,6 +679,13 @@ static const char * const stat_usage[] = { NULL }; +static int stat__set_big_num(const struct option *opt __used, + const char *s __used, int unset) +{ + big_num_opt = unset ? 0 : 1; + return 0; +} + static const struct option options[] = { OPT_CALLBACK('e', "event", NULL, "event", "event selector. use 'perf list' to list available events", @@ -541,10 +706,15 @@ static const struct option options[] = { "repeat command and print average + stddev (max: 100)"), OPT_BOOLEAN('n', "null", &null_run, "null run - dont start any counters"), - OPT_BOOLEAN('B', "big-num", &big_num, - "print large numbers with thousands\' separators"), + OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, + "print large numbers with thousands\' separators", + stat__set_big_num), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to monitor in system-wide"), + OPT_BOOLEAN('A', "no-aggr", &no_aggr, + "disable CPU count aggregation"), + OPT_STRING('x', "field-separator", &csv_sep, "separator", + "print counts with custom separator"), OPT_END() }; @@ -557,11 +727,34 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + + if (csv_sep) + csv_output = true; + else + csv_sep = DEFAULT_SEPARATOR; + + /* + * let the spreadsheet do the pretty-printing + */ + if (csv_output) { + /* User explicitely passed -B? */ + if (big_num_opt == 1) { + fprintf(stderr, "-B option not supported with -x\n"); + usage_with_options(stat_usage, options); + } else /* Nope, so disable big number formatting */ + big_num = false; + } else if (big_num_opt == 0) /* User passed --no-big-num */ + big_num = false; + if (!argc && target_pid == -1 && target_tid == -1) usage_with_options(stat_usage, options); if (run_count <= 0) usage_with_options(stat_usage, options); + /* no_aggr is for system-wide only */ + if (no_aggr && !system_wide) + usage_with_options(stat_usage, options); + /* Set attrs and nr_counters if no event is selected and !null_run */ if (!null_run && !nr_counters) { memcpy(attrs, default_attrs, sizeof(default_attrs)); diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 035b9fa063a..e0c3f471f22 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -119,10 +119,16 @@ static int test__vmlinux_matches_kallsyms(void) * end addresses too. */ for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { - struct symbol *pair; + struct symbol *pair, *first_pair; + bool backwards = true; sym = rb_entry(nd, struct symbol, rb_node); - pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL); + + if (sym->start == sym->end) + continue; + + first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL); + pair = first_pair; if (pair && pair->start == sym->start) { next_pair: @@ -143,8 +149,10 @@ next_pair: pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n", sym->start, sym->name, sym->end, pair->end); } else { - struct rb_node *nnd = rb_prev(&pair->rb_node); - + struct rb_node *nnd; +detour: + nnd = backwards ? rb_prev(&pair->rb_node) : + rb_next(&pair->rb_node); if (nnd) { struct symbol *next = rb_entry(nnd, struct symbol, rb_node); @@ -153,6 +161,13 @@ next_pair: goto next_pair; } } + + if (backwards) { + backwards = false; + pair = first_pair; + goto detour; + } + pr_debug("%#Lx: diff name v: %s k: %s\n", sym->start, sym->name, pair->name); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 9bcc38f0b70..d75084bccdb 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -272,19 +272,22 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(event_t *event, struct perf_session *session __used) +static int process_comm_event(event_t *event, struct sample_data *sample __used, + struct perf_session *session __used) { pid_set_comm(event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(event_t *event, struct perf_session *session __used) +static int process_fork_event(event_t *event, struct sample_data *sample __used, + struct perf_session *session __used) { pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(event_t *event, struct perf_session *session __used) +static int process_exit_event(event_t *event, struct sample_data *sample __used, + struct perf_session *session __used) { pid_exit(event->fork.pid, event->fork.time); return 0; @@ -470,24 +473,21 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) } -static int process_sample_event(event_t *event, struct perf_session *session) +static int process_sample_event(event_t *event __used, + struct sample_data *sample, + struct perf_session *session) { - struct sample_data data; struct trace_entry *te; - memset(&data, 0, sizeof(data)); - - event__parse_sample(event, session->sample_type, &data); - if (session->sample_type & PERF_SAMPLE_TIME) { - if (!first_time || first_time > data.time) - first_time = data.time; - if (last_time < data.time) - last_time = data.time; + if (!first_time || first_time > sample->time) + first_time = sample->time; + if (last_time < sample->time) + last_time = sample->time; } - te = (void *)data.raw_data; - if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) { + te = (void *)sample->raw_data; + if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) { char *event_str; struct power_entry *pe; @@ -499,19 +499,19 @@ static int process_sample_event(event_t *event, struct perf_session *session) return 0; if (strcmp(event_str, "power:power_start") == 0) - c_state_start(pe->cpu_id, data.time, pe->value); + c_state_start(pe->cpu_id, sample->time, pe->value); if (strcmp(event_str, "power:power_end") == 0) - c_state_end(pe->cpu_id, data.time); + c_state_end(pe->cpu_id, sample->time); if (strcmp(event_str, "power:power_frequency") == 0) - p_state_change(pe->cpu_id, data.time, pe->value); + p_state_change(pe->cpu_id, sample->time, pe->value); if (strcmp(event_str, "sched:sched_wakeup") == 0) - sched_wakeup(data.cpu, data.time, data.pid, te); + sched_wakeup(sample->cpu, sample->time, sample->pid, te); if (strcmp(event_str, "sched:sched_switch") == 0) - sched_switch(data.cpu, data.time, te); + sched_switch(sample->cpu, sample->time, te); } return 0; } @@ -937,7 +937,8 @@ static struct perf_event_ops event_ops = { static int __cmd_timechart(void) { - struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); + struct perf_session *session = perf_session__new(input_name, O_RDONLY, + 0, false, &event_ops); int ret = -EINVAL; if (session == NULL) @@ -989,6 +990,9 @@ static int __cmd_record(int argc, const char **argv) rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (rec_argv == NULL) + return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); @@ -1018,6 +1022,8 @@ static const struct option options[] = { OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), + OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", + "Look for files with symbols relative to this directory"), OPT_END() }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index dd625808c2a..ae15f046c40 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -977,12 +977,12 @@ static int symbol_filter(struct map *map, struct symbol *sym) } static void event__process_sample(const event_t *self, - struct perf_session *session, int counter) + struct sample_data *sample, + struct perf_session *session, int counter) { u64 ip = self->ip.ip; struct sym_entry *syme; struct addr_location al; - struct sample_data data; struct machine *machine; u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; @@ -1025,7 +1025,7 @@ static void event__process_sample(const event_t *self, if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) exact_samples++; - if (event__preprocess_sample(self, session, &al, &data, + if (event__preprocess_sample(self, session, &al, sample, symbol_filter) < 0 || al.filtered) return; @@ -1105,6 +1105,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self, unsigned int head = mmap_read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; + struct sample_data sample; int diff; /* @@ -1152,10 +1153,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self, event = &event_copy; } + event__parse_sample(event, self, &sample); if (event->header.type == PERF_RECORD_SAMPLE) - event__process_sample(event, self, md->counter); + event__process_sample(event, &sample, self, md->counter); else - event__process(event, self); + event__process(event, &sample, self); old += size; } @@ -1214,7 +1216,9 @@ try_again: int err = errno; if (err == EPERM || err == EACCES) - die("No permission - are you root?\n"); + die("Permission error - are you root?\n" + "\t Consider tweaking" + " /proc/sys/kernel/perf_event_paranoid.\n"); /* * If it's cycles then fall back to hrtimer * based cpu-clock-tick sw counter, which @@ -1231,7 +1235,7 @@ try_again: goto try_again; } printf("\n"); - error("perfcounter syscall returned with %d (%s)\n", + error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", fd[i][counter][thread_index], strerror(err)); die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); @@ -1268,7 +1272,7 @@ static int __cmd_top(void) * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. */ - struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false); + struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 921245b2858..c7798c7f24e 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix); extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_timechart(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); -extern int cmd_trace(int argc, const char **argv, const char *prefix); +extern int cmd_script(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_probe(int argc, const char **argv, const char *prefix); extern int cmd_kmem(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 949d77fc0b9..16b5088cf8f 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -16,7 +16,7 @@ perf-report mainporcelain common perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common -perf-trace mainporcelain common +perf-script mainporcelain common perf-probe mainporcelain common perf-kmem mainporcelain common perf-lock mainporcelain common diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index b253db634f0..b041ca67a2c 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak @@ -9,8 +9,8 @@ endef ifndef NO_DWARF define SOURCE_DWARF #include <dwarf.h> -#include <libdw.h> -#include <version.h> +#include <elfutils/libdw.h> +#include <elfutils/version.h> #ifndef _ELFUTILS_PREREQ #error #endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index cdd6c03f1e1..595d0f4a710 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -323,7 +323,7 @@ static void handle_internal_command(int argc, const char **argv) { "top", cmd_top, 0 }, { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, - { "trace", cmd_trace, 0 }, + { "script", cmd_script, 0 }, { "sched", cmd_sched, 0 }, { "probe", cmd_probe, 0 }, { "kmem", cmd_kmem, 0 }, diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c index 01a64ad693f..790ceba6ad3 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -8,7 +8,7 @@ #line 1 "Context.xs" /* - * Context.xs. XS interfaces for perf trace. + * Context.xs. XS interfaces for perf script. * * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> * diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs index 549cf0467d3..c1e2ed1ed34 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -1,5 +1,5 @@ /* - * Context.xs. XS interfaces for perf trace. + * Context.xs. XS interfaces for perf script. * * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> * @@ -23,7 +23,7 @@ #include "perl.h" #include "XSUB.h" #include "../../../perf.h" -#include "../../../util/trace-event.h" +#include "../../../util/script-event.h" MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context PROTOTYPES: ENABLE diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README index 9a970763079..2f0c7f3043e 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/README +++ b/tools/perf/scripts/perl/Perf-Trace-Util/README @@ -1,7 +1,7 @@ Perf-Trace-Util version 0.01 ============================ -This module contains utility functions for use with perf trace. +This module contains utility functions for use with perf script. Core.pm and Util.pm are pure Perl modules; Core.pm contains routines that the core perf support for Perl calls on and should always be @@ -33,7 +33,7 @@ After you do that: INSTALLATION -Building perf with perf trace Perl scripting should install this +Building perf with perf script Perl scripting should install this module in the right place. You should make sure libperl and ExtUtils/Embed.pm are installed first diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm index 6c7f3659cb1..4e2f6039ac9 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm @@ -34,7 +34,7 @@ Perf::Trace::Context - Perl extension for accessing functions in perf. =head1 SEE ALSO -Perf (trace) documentation +Perf (script) documentation =head1 AUTHOR diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm index 9df376a9f62..9158458d3ee 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm @@ -163,7 +163,7 @@ sub dump_symbolic_fields __END__ =head1 NAME -Perf::Trace::Core - Perl extension for perf trace +Perf::Trace::Core - Perl extension for perf script =head1 SYNOPSIS @@ -171,7 +171,7 @@ Perf::Trace::Core - Perl extension for perf trace =head1 SEE ALSO -Perf (trace) documentation +Perf (script) documentation =head1 AUTHOR diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm index d94b40c8ac8..05350011462 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm @@ -65,7 +65,7 @@ sub clear_term __END__ =head1 NAME -Perf::Trace::Util - Perl extension for perf trace +Perf::Trace::Util - Perl extension for perf script =head1 SYNOPSIS @@ -73,7 +73,7 @@ Perf::Trace::Util - Perl extension for perf trace =head1 SEE ALSO -Perf (trace) documentation +Perf (script) documentation =head1 AUTHOR diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report index 4028d92dc4a..9f83cc1ad8b 100644 --- a/tools/perf/scripts/perl/bin/failed-syscalls-report +++ b/tools/perf/scripts/perl/bin/failed-syscalls-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then shift fi fi -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm +perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report index ba25f4d41fb..77200b3f310 100644 --- a/tools/perf/scripts/perl/bin/rw-by-file-report +++ b/tools/perf/scripts/perl/bin/rw-by-file-report @@ -7,7 +7,4 @@ if [ $# -lt 1 ] ; then fi comm=$1 shift -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm - - - +perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report index 641a3f5d085..a27b9f311f9 100644 --- a/tools/perf/scripts/perl/bin/rw-by-pid-report +++ b/tools/perf/scripts/perl/bin/rw-by-pid-report @@ -1,6 +1,3 @@ #!/bin/bash # description: system-wide r/w activity -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl - - - +perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report index 4918dba7702..83e11ec2e19 100644 --- a/tools/perf/scripts/perl/bin/rwtop-report +++ b/tools/perf/scripts/perl/bin/rwtop-report @@ -17,7 +17,4 @@ if [ "$n_args" -gt 0 ] ; then interval=$1 shift fi -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval - - - +perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report index 49052ebcb63..889e8130cca 100644 --- a/tools/perf/scripts/perl/bin/wakeup-latency-report +++ b/tools/perf/scripts/perl/bin/wakeup-latency-report @@ -1,6 +1,3 @@ #!/bin/bash # description: system-wide min/max/avg wakeup latency -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl - - - +perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report index df0c65f4ca9..6d91411d248 100644 --- a/tools/perf/scripts/perl/bin/workqueue-stats-report +++ b/tools/perf/scripts/perl/bin/workqueue-stats-report @@ -1,7 +1,3 @@ #!/bin/bash # description: workqueue stats (ins/exe/create/destroy) -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl - - - - +perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl index 4e7dc0a407a..4e7076c2061 100644 --- a/tools/perf/scripts/perl/check-perf-trace.pl +++ b/tools/perf/scripts/perl/check-perf-trace.pl @@ -1,4 +1,4 @@ -# perf trace event handlers, generated by perf trace -g perl +# perf script event handlers, generated by perf script -g perl # (c) 2009, Tom Zanussi <tzanussi@gmail.com> # Licensed under the terms of the GNU GPL License version 2 diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl index 2a39097687b..74844ee2be3 100644 --- a/tools/perf/scripts/perl/rw-by-file.pl +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -18,7 +18,7 @@ use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Util; -my $usage = "perf trace -s rw-by-file.pl <comm>\n"; +my $usage = "perf script -s rw-by-file.pl <comm>\n"; my $for_comm = shift or die $usage; diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl index b84b12699b7..a8eaff5119e 100644 --- a/tools/perf/scripts/perl/workqueue-stats.pl +++ b/tools/perf/scripts/perl/workqueue-stats.pl @@ -10,7 +10,7 @@ # workqueue:workqueue_destruction -e workqueue:workqueue_execution # -e workqueue:workqueue_insertion # -# perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl +# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl use 5.010000; use strict; diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 957085dd5d8..315067b8f55 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -1,5 +1,5 @@ /* - * Context.c. Python interfaces for perf trace. + * Context.c. Python interfaces for perf script. * * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> * diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py index aad7525bca1..de7211e4fa4 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py @@ -1,4 +1,4 @@ -# Core.py - Python extension for perf trace, core functions +# Core.py - Python extension for perf script, core functions # # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com> # diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py index ae9a56e43e0..fdd92f69905 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py @@ -1,4 +1,4 @@ -# SchedGui.py - Python extension for perf trace, basic GUI code for +# SchedGui.py - Python extension for perf script, basic GUI code for # traces drawing and overview. # # Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com> diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 13cc02b5893..15c8400240f 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -1,4 +1,4 @@ -# Util.py - Python extension for perf trace, miscellaneous utility code +# Util.py - Python extension for perf script, miscellaneous utility code # # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com> # diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report index 03587021463..fda5096d0cb 100644 --- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report +++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then shift fi fi -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report index c8268138fb7..6c44271091a 100644 --- a/tools/perf/scripts/python/bin/futex-contention-report +++ b/tools/perf/scripts/python/bin/futex-contention-report @@ -1,4 +1,4 @@ #!/bin/bash # description: futext contention measurement -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report index 4ad361b3124..8f759291da8 100644 --- a/tools/perf/scripts/python/bin/netdev-times-report +++ b/tools/perf/scripts/python/bin/netdev-times-report @@ -2,4 +2,4 @@ # description: display a process of packet and processing time # args: [tx] [rx] [dev=] [debug] -perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@ +perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@ diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report index df1791f07c2..68b037a1849 100644 --- a/tools/perf/scripts/python/bin/sched-migration-report +++ b/tools/perf/scripts/python/bin/sched-migration-report @@ -1,3 +1,3 @@ #!/bin/bash # description: sched migration overview -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report index 36b409c05e5..c32db294124 100644 --- a/tools/perf/scripts/python/bin/sctop-report +++ b/tools/perf/scripts/python/bin/sctop-report @@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then interval=$1 shift fi -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report index 4eb88c9fc83..16eb8d65c54 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report +++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then shift fi fi -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report index cb2f9c5cf17..0f0e9d453bb 100644 --- a/tools/perf/scripts/python/bin/syscall-counts-report +++ b/tools/perf/scripts/python/bin/syscall-counts-report @@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then shift fi fi -perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py index d9f7893e315..4647a7694cf 100644 --- a/tools/perf/scripts/python/check-perf-trace.py +++ b/tools/perf/scripts/python/check-perf-trace.py @@ -1,4 +1,4 @@ -# perf trace event handlers, generated by perf trace -g python +# perf script event handlers, generated by perf script -g python # (c) 2010, Tom Zanussi <tzanussi@gmail.com> # Licensed under the terms of the GNU GPL License version 2 # diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py index acd7848717b..85805fac411 100644 --- a/tools/perf/scripts/python/failed-syscalls-by-pid.py +++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py @@ -15,7 +15,7 @@ from perf_trace_context import * from Core import * from Util import * -usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n"; +usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n"; for_comm = None for_pid = None diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py index b934383c336..74d55ec08ae 100644 --- a/tools/perf/scripts/python/sched-migration.py +++ b/tools/perf/scripts/python/sched-migration.py @@ -4,7 +4,7 @@ # # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> # -# perf trace event handlers have been generated by perf trace -g python +# perf script event handlers have been generated by perf script -g python # # This software is distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py index 7a6ec2c7d8a..42c267e292f 100644 --- a/tools/perf/scripts/python/sctop.py +++ b/tools/perf/scripts/python/sctop.py @@ -17,7 +17,7 @@ from perf_trace_context import * from Core import * from Util import * -usage = "perf trace -s sctop.py [comm] [interval]\n"; +usage = "perf script -s sctop.py [comm] [interval]\n"; for_comm = None default_interval = 3 diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py index d1ee3ec10cf..c64d1c55d74 100644 --- a/tools/perf/scripts/python/syscall-counts-by-pid.py +++ b/tools/perf/scripts/python/syscall-counts-by-pid.py @@ -14,7 +14,7 @@ from perf_trace_context import * from Core import * from Util import syscall_name -usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; +usage = "perf script -s syscall-counts-by-pid.py [comm]\n"; for_comm = None for_pid = None diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py index ea183dc82d2..b435d3f188e 100644 --- a/tools/perf/scripts/python/syscall-counts.py +++ b/tools/perf/scripts/python/syscall-counts.py @@ -15,7 +15,7 @@ from perf_trace_context import * from Core import * from Util import syscall_name -usage = "perf trace -s syscall-counts.py [comm]\n"; +usage = "perf script -s syscall-counts.py [comm]\n"; for_comm = None diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e437edb7241..deffb8c9607 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -14,7 +14,9 @@ #include <linux/kernel.h> #include "debug.h" -static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) +static int build_id__mark_dso_hit(event_t *event, + struct sample_data *sample __used, + struct perf_session *session) { struct addr_location al; u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; @@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) return 0; } -static int event__exit_del_thread(event_t *self, struct perf_session *session) +static int event__exit_del_thread(event_t *self, struct sample_data *sample __used, + struct perf_session *session) { struct thread *thread = perf_session__findnew(session, self->fork.tid); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index c8d81b00089..01bbe8ecec3 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...) return ret; } -static int dump_printf_color(const char *fmt, const char *color, ...) +#ifdef NO_NEWT_SUPPORT +void ui__warning(const char *format, ...) { va_list args; - int ret = 0; - if (dump_trace) { - va_start(args, color); - ret = color_vfprintf(stdout, color, fmt, args); - va_end(args); - } - - return ret; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); } - +#endif void trace_event(event_t *event) { @@ -70,29 +66,29 @@ void trace_event(event_t *event) if (!dump_trace) return; - dump_printf("."); - dump_printf_color("\n. ... raw event: size %d bytes\n", color, - event->header.size); + printf("."); + color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", + event->header.size); for (i = 0; i < event->header.size; i++) { if ((i & 15) == 0) { - dump_printf("."); - dump_printf_color(" %04x: ", color, i); + printf("."); + color_fprintf(stdout, color, " %04x: ", i); } - dump_printf_color(" %02x", color, raw_event[i]); + color_fprintf(stdout, color, " %02x", raw_event[i]); if (((i & 15) == 15) || i == event->header.size-1) { - dump_printf_color(" ", color); + color_fprintf(stdout, color, " "); for (j = 0; j < 15-(i & 15); j++) - dump_printf_color(" ", color); + color_fprintf(stdout, color, " "); for (j = i & ~15; j <= i; j++) { - dump_printf_color("%c", color, - isprint(raw_event[j]) ? - raw_event[j] : '.'); + color_fprintf(stdout, color, "%c", + isprint(raw_event[j]) ? + raw_event[j] : '.'); } - dump_printf_color("\n", color); + color_fprintf(stdout, color, "\n"); } } - dump_printf(".\n"); + printf(".\n"); } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 7b514082bba..ca35fd66b5d 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap); #include "ui/progress.h" #endif +void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); + #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dab9e754a28..2302ec051bb 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -7,7 +7,7 @@ #include "strlist.h" #include "thread.h" -const char *event__name[] = { +static const char *event__name[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", [PERF_RECORD_LOST] = "LOST", @@ -22,13 +22,31 @@ const char *event__name[] = { [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", + [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", }; -static pid_t event__synthesize_comm(pid_t pid, int full, +const char *event__get_event_name(unsigned int id) +{ + if (id >= ARRAY_SIZE(event__name)) + return "INVALID"; + if (!event__name[id]) + return "UNKNOWN"; + return event__name[id]; +} + +static struct sample_data synth_sample = { + .pid = -1, + .tid = -1, + .time = -1, + .stream_id = -1, + .cpu = -1, + .period = 1, +}; + +static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full, event__handler_t process, struct perf_session *session) { - event_t ev; char filename[PATH_MAX]; char bf[BUFSIZ]; FILE *fp; @@ -49,34 +67,39 @@ out_race: return 0; } - memset(&ev.comm, 0, sizeof(ev.comm)); - while (!ev.comm.comm[0] || !ev.comm.pid) { - if (fgets(bf, sizeof(bf), fp) == NULL) - goto out_failure; + memset(&event->comm, 0, sizeof(event->comm)); + + while (!event->comm.comm[0] || !event->comm.pid) { + if (fgets(bf, sizeof(bf), fp) == NULL) { + pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); + goto out; + } if (memcmp(bf, "Name:", 5) == 0) { char *name = bf + 5; while (*name && isspace(*name)) ++name; size = strlen(name) - 1; - memcpy(ev.comm.comm, name, size++); + memcpy(event->comm.comm, name, size++); } else if (memcmp(bf, "Tgid:", 5) == 0) { char *tgids = bf + 5; while (*tgids && isspace(*tgids)) ++tgids; - tgid = ev.comm.pid = atoi(tgids); + tgid = event->comm.pid = atoi(tgids); } } - ev.comm.header.type = PERF_RECORD_COMM; + event->comm.header.type = PERF_RECORD_COMM; size = ALIGN(size, sizeof(u64)); - ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); - + memset(event->comm.comm + size, 0, session->id_hdr_size); + event->comm.header.size = (sizeof(event->comm) - + (sizeof(event->comm.comm) - size) + + session->id_hdr_size); if (!full) { - ev.comm.tid = pid; + event->comm.tid = pid; - process(&ev, session); - goto out_fclose; + process(event, &synth_sample, session); + goto out; } snprintf(filename, sizeof(filename), "/proc/%d/task", pid); @@ -91,22 +114,19 @@ out_race: if (*end) continue; - ev.comm.tid = pid; + event->comm.tid = pid; - process(&ev, session); + process(event, &synth_sample, session); } - closedir(tasks); -out_fclose: + closedir(tasks); +out: fclose(fp); - return tgid; -out_failure: - pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); - return -1; + return tgid; } -static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, +static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid, event__handler_t process, struct perf_session *session) { @@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, return -1; } + event->header.type = PERF_RECORD_MMAP; + /* + * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c + */ + event->header.misc = PERF_RECORD_MISC_USER; + while (1) { char bf[BUFSIZ], *pbf = bf; - event_t ev = { - .header = { - .type = PERF_RECORD_MMAP, - /* - * Just like the kernel, see __perf_event_mmap - * in kernel/perf_event.c - */ - .misc = PERF_RECORD_MISC_USER, - }, - }; int n; size_t size; if (fgets(bf, sizeof(bf), fp) == NULL) break; /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = hex2u64(pbf, &ev.mmap.start); + n = hex2u64(pbf, &event->mmap.start); if (n < 0) continue; pbf += n + 1; - n = hex2u64(pbf, &ev.mmap.len); + n = hex2u64(pbf, &event->mmap.len); if (n < 0) continue; pbf += n + 3; @@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, continue; pbf += 3; - n = hex2u64(pbf, &ev.mmap.pgoff); + n = hex2u64(pbf, &event->mmap.pgoff); size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ - memcpy(ev.mmap.filename, execname, size); + memcpy(event->mmap.filename, execname, size); size = ALIGN(size, sizeof(u64)); - ev.mmap.len -= ev.mmap.start; - ev.mmap.header.size = (sizeof(ev.mmap) - - (sizeof(ev.mmap.filename) - size)); - ev.mmap.pid = tgid; - ev.mmap.tid = pid; - - process(&ev, session); + event->mmap.len -= event->mmap.start; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, session->id_hdr_size); + event->mmap.header.size += session->id_hdr_size; + event->mmap.pid = tgid; + event->mmap.tid = pid; + + process(event, &synth_sample, session); } } @@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process, { struct rb_node *nd; struct map_groups *kmaps = &machine->kmaps; - u16 misc; + event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); + + if (event == NULL) { + pr_debug("Not enough memory synthesizing mmap event " + "for kernel modules\n"); + return -1; + } + + event->header.type = PERF_RECORD_MMAP; /* * kernel uses 0 for user space maps, see kernel/perf_event.c * __perf_event_mmap */ if (machine__is_host(machine)) - misc = PERF_RECORD_MISC_KERNEL; + event->header.misc = PERF_RECORD_MISC_KERNEL; else - misc = PERF_RECORD_MISC_GUEST_KERNEL; + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { - event_t ev; size_t size; struct map *pos = rb_entry(nd, struct map, rb_node); @@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process, continue; size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); - memset(&ev, 0, sizeof(ev)); - ev.mmap.header.misc = misc; - ev.mmap.header.type = PERF_RECORD_MMAP; - ev.mmap.header.size = (sizeof(ev.mmap) - - (sizeof(ev.mmap.filename) - size)); - ev.mmap.start = pos->start; - ev.mmap.len = pos->end - pos->start; - ev.mmap.pid = machine->pid; - - memcpy(ev.mmap.filename, pos->dso->long_name, + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, session->id_hdr_size); + event->mmap.header.size += session->id_hdr_size; + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pid = machine->pid; + + memcpy(event->mmap.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - process(&ev, session); + process(event, &synth_sample, session); } + free(event); return 0; } -int event__synthesize_thread(pid_t pid, event__handler_t process, - struct perf_session *session) +static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event, + pid_t pid, event__handler_t process, + struct perf_session *session) { - pid_t tgid = event__synthesize_comm(pid, 1, process, session); + pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process, + session); if (tgid == -1) return -1; - return event__synthesize_mmap_events(pid, tgid, process, session); + return event__synthesize_mmap_events(mmap_event, pid, tgid, + process, session); +} + +int event__synthesize_thread(pid_t pid, event__handler_t process, + struct perf_session *session) +{ + event_t *comm_event, *mmap_event; + int err = -1; + + comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); + if (comm_event == NULL) + goto out; + + mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); + if (mmap_event == NULL) + goto out_free_comm; + + err = __event__synthesize_thread(comm_event, mmap_event, pid, + process, session); + free(mmap_event); +out_free_comm: + free(comm_event); +out: + return err; } -void event__synthesize_threads(event__handler_t process, - struct perf_session *session) +int event__synthesize_threads(event__handler_t process, + struct perf_session *session) { DIR *proc; struct dirent dirent, *next; + event_t *comm_event, *mmap_event; + int err = -1; + + comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); + if (comm_event == NULL) + goto out; + + mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); + if (mmap_event == NULL) + goto out_free_comm; proc = opendir("/proc"); + if (proc == NULL) + goto out_free_mmap; while (!readdir_r(proc, &dirent, &next) && next) { char *end; @@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process, if (*end) /* only interested in proper numerical dirents */ continue; - event__synthesize_thread(pid, process, session); + __event__synthesize_thread(comm_event, mmap_event, pid, + process, session); } closedir(proc); + err = 0; +out_free_mmap: + free(mmap_event); +out_free_comm: + free(comm_event); +out: + return err; } struct process_symbol_args { @@ -260,7 +332,8 @@ struct process_symbol_args { u64 start; }; -static int find_symbol_cb(void *arg, const char *name, char type, u64 start) +static int find_symbol_cb(void *arg, const char *name, char type, + u64 start, u64 end __used) { struct process_symbol_args *args = arg; @@ -286,18 +359,20 @@ int event__synthesize_kernel_mmap(event__handler_t process, char path[PATH_MAX]; char name_buff[PATH_MAX]; struct map *map; - - event_t ev = { - .header = { - .type = PERF_RECORD_MMAP, - }, - }; + int err; /* * We should get this from /sys/kernel/sections/.text, but till that is * available use this, and after it is use this as a fallback for older * kernels. */ struct process_symbol_args args = { .name = symbol_name, }; + event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); + + if (event == NULL) { + pr_debug("Not enough memory synthesizing mmap event " + "for kernel modules\n"); + return -1; + } mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); if (machine__is_host(machine)) { @@ -305,10 +380,10 @@ int event__synthesize_kernel_mmap(event__handler_t process, * kernel uses PERF_RECORD_MISC_USER for user space maps, * see kernel/perf_event.c __perf_event_mmap */ - ev.header.misc = PERF_RECORD_MISC_KERNEL; + event->header.misc = PERF_RECORD_MISC_KERNEL; filename = "/proc/kallsyms"; } else { - ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; if (machine__is_default_guest(machine)) filename = (char *) symbol_conf.default_guest_kallsyms; else { @@ -321,17 +396,21 @@ int event__synthesize_kernel_mmap(event__handler_t process, return -ENOENT; map = machine->vmlinux_maps[MAP__FUNCTION]; - size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), + size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", mmap_name, symbol_name) + 1; size = ALIGN(size, sizeof(u64)); - ev.mmap.header.size = (sizeof(ev.mmap) - - (sizeof(ev.mmap.filename) - size)); - ev.mmap.pgoff = args.start; - ev.mmap.start = map->start; - ev.mmap.len = map->end - ev.mmap.start; - ev.mmap.pid = machine->pid; - - return process(&ev, session); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size) + session->id_hdr_size); + event->mmap.pgoff = args.start; + event->mmap.start = map->start; + event->mmap.len = map->end - event->mmap.start; + event->mmap.pid = machine->pid; + + err = process(event, &synth_sample, session); + free(event); + + return err; } static void thread__comm_adjust(struct thread *self, struct hists *hists) @@ -361,7 +440,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm, return 0; } -int event__process_comm(event_t *self, struct perf_session *session) +int event__process_comm(event_t *self, struct sample_data *sample __used, + struct perf_session *session) { struct thread *thread = perf_session__findnew(session, self->comm.tid); @@ -376,7 +456,8 @@ int event__process_comm(event_t *self, struct perf_session *session) return 0; } -int event__process_lost(event_t *self, struct perf_session *session) +int event__process_lost(event_t *self, struct sample_data *sample __used, + struct perf_session *session) { dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); session->hists.stats.total_lost += self->lost.lost; @@ -392,7 +473,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self) * a zero sized synthesized MMAP event for the kernel. */ if (maps[MAP__FUNCTION]->end == 0) - maps[MAP__FUNCTION]->end = ~0UL; + maps[MAP__FUNCTION]->end = ~0ULL; } static int event__process_kernel_mmap(event_t *self, @@ -485,7 +566,8 @@ out_problem: return -1; } -int event__process_mmap(event_t *self, struct perf_session *session) +int event__process_mmap(event_t *self, struct sample_data *sample __used, + struct perf_session *session) { struct machine *machine; struct thread *thread; @@ -526,7 +608,8 @@ out_problem: return 0; } -int event__process_task(event_t *self, struct perf_session *session) +int event__process_task(event_t *self, struct sample_data *sample __used, + struct perf_session *session) { struct thread *thread = perf_session__findnew(session, self->fork.tid); struct thread *parent = perf_session__findnew(session, self->fork.ptid); @@ -548,18 +631,19 @@ int event__process_task(event_t *self, struct perf_session *session) return 0; } -int event__process(event_t *event, struct perf_session *session) +int event__process(event_t *event, struct sample_data *sample, + struct perf_session *session) { switch (event->header.type) { case PERF_RECORD_COMM: - event__process_comm(event, session); + event__process_comm(event, sample, session); break; case PERF_RECORD_MMAP: - event__process_mmap(event, session); + event__process_mmap(event, sample, session); break; case PERF_RECORD_FORK: case PERF_RECORD_EXIT: - event__process_task(event, session); + event__process_task(event, sample, session); break; default: break; @@ -674,32 +758,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session, symbol_filter_t filter) { u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - struct thread *thread; - - event__parse_sample(self, session->sample_type, data); - - dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n", - self->header.misc, data->pid, data->tid, data->ip, - data->period, data->cpu); - - if (session->sample_type & PERF_SAMPLE_CALLCHAIN) { - unsigned int i; - - dump_printf("... chain: nr:%Lu\n", data->callchain->nr); + struct thread *thread = perf_session__findnew(session, self->ip.pid); - if (!ip_callchain__valid(data->callchain, self)) { - pr_debug("call-chain problem with event, " - "skipping it.\n"); - goto out_filtered; - } - - if (dump_trace) { - for (i = 0; i < data->callchain->nr; i++) - dump_printf("..... %2d: %016Lx\n", - i, data->callchain->ips[i]); - } - } - thread = perf_session__findnew(session, self->ip.pid); if (thread == NULL) return -1; @@ -766,9 +826,65 @@ out_filtered: return 0; } -int event__parse_sample(const event_t *event, u64 type, struct sample_data *data) +static int event__parse_id_sample(const event_t *event, + struct perf_session *session, + struct sample_data *sample) { - const u64 *array = event->sample.array; + const u64 *array; + u64 type; + + sample->cpu = sample->pid = sample->tid = -1; + sample->stream_id = sample->id = sample->time = -1ULL; + + if (!session->sample_id_all) + return 0; + + array = event->sample.array; + array += ((event->header.size - + sizeof(event->header)) / sizeof(u64)) - 1; + type = session->sample_type; + + if (type & PERF_SAMPLE_CPU) { + u32 *p = (u32 *)array; + sample->cpu = *p; + array--; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + sample->stream_id = *array; + array--; + } + + if (type & PERF_SAMPLE_ID) { + sample->id = *array; + array--; + } + + if (type & PERF_SAMPLE_TIME) { + sample->time = *array; + array--; + } + + if (type & PERF_SAMPLE_TID) { + u32 *p = (u32 *)array; + sample->pid = p[0]; + sample->tid = p[1]; + } + + return 0; +} + +int event__parse_sample(const event_t *event, struct perf_session *session, + struct sample_data *data) +{ + const u64 *array; + u64 type; + + if (event->header.type != PERF_RECORD_SAMPLE) + return event__parse_id_sample(event, session, data); + + array = event->sample.array; + type = session->sample_type; if (type & PERF_SAMPLE_IP) { data->ip = event->ip.ip; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8e790dae702..2b7e91902f1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -85,6 +85,7 @@ struct build_id_event { }; enum perf_user_event_type { /* above any possible kernel type */ + PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, PERF_RECORD_HEADER_EVENT_TYPE = 65, PERF_RECORD_HEADER_TRACING_DATA = 66, @@ -135,12 +136,15 @@ void event__print_totals(void); struct perf_session; -typedef int (*event__handler_t)(event_t *event, struct perf_session *session); +typedef int (*event__handler_synth_t)(event_t *event, + struct perf_session *session); +typedef int (*event__handler_t)(event_t *event, struct sample_data *sample, + struct perf_session *session); int event__synthesize_thread(pid_t pid, event__handler_t process, struct perf_session *session); -void event__synthesize_threads(event__handler_t process, - struct perf_session *session); +int event__synthesize_threads(event__handler_t process, + struct perf_session *session); int event__synthesize_kernel_mmap(event__handler_t process, struct perf_session *session, struct machine *machine, @@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process, struct perf_session *session, struct machine *machine); -int event__process_comm(event_t *self, struct perf_session *session); -int event__process_lost(event_t *self, struct perf_session *session); -int event__process_mmap(event_t *self, struct perf_session *session); -int event__process_task(event_t *self, struct perf_session *session); -int event__process(event_t *event, struct perf_session *session); +int event__process_comm(event_t *self, struct sample_data *sample, + struct perf_session *session); +int event__process_lost(event_t *self, struct sample_data *sample, + struct perf_session *session); +int event__process_mmap(event_t *self, struct sample_data *sample, + struct perf_session *session); +int event__process_task(event_t *self, struct sample_data *sample, + struct perf_session *session); +int event__process(event_t *event, struct sample_data *sample, + struct perf_session *session); struct addr_location; int event__preprocess_sample(const event_t *self, struct perf_session *session, struct addr_location *al, struct sample_data *data, symbol_filter_t filter); -int event__parse_sample(const event_t *event, u64 type, struct sample_data *data); +int event__parse_sample(const event_t *event, struct perf_session *session, + struct sample_data *sample); -extern const char *event__name[]; +const char *event__get_event_name(unsigned int id); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7cba0551a56..4b8c8397a94 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat) set_bit(feat, self->adds_features); } +void perf_header__clear_feat(struct perf_header *self, int feat) +{ + clear_bit(feat, self->adds_features); +} + bool perf_header__has_feat(const struct perf_header *self, int feat) { return test_bit(feat, self->adds_features); @@ -433,8 +438,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd) int idx = 0, err; session = container_of(self, struct perf_session, header); - if (perf_session__read_build_ids(session, true)) - perf_header__set_feat(self, HEADER_BUILD_ID); + + if (perf_header__has_feat(self, HEADER_BUILD_ID && + !perf_session__read_build_ids(session, true))) + perf_header__clear_feat(self, HEADER_BUILD_ID); nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); if (!nr_sections) @@ -941,6 +948,24 @@ u64 perf_header__sample_type(struct perf_header *header) return type; } +bool perf_header__sample_id_all(const struct perf_header *header) +{ + bool value = false, first = true; + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + + if (first) { + value = attr->attr.sample_id_all; + first = false; + } else if (value != attr->attr.sample_id_all) + die("non matching sample_id_all"); + } + + return value; +} + struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header) { @@ -987,21 +1012,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, ev = malloc(size); + if (ev == NULL) + return -ENOMEM; + ev->attr.attr = *attr; memcpy(ev->attr.id, id, ids * sizeof(u64)); ev->attr.header.type = PERF_RECORD_HEADER_ATTR; ev->attr.header.size = size; - err = process(ev, session); + err = process(ev, NULL, session); free(ev); return err; } -int event__synthesize_attrs(struct perf_header *self, - event__handler_t process, +int event__synthesize_attrs(struct perf_header *self, event__handler_t process, struct perf_session *session) { struct perf_header_attr *attr; @@ -1071,7 +1098,7 @@ int event__synthesize_event_type(u64 event_id, char *name, ev.event_type.header.size = sizeof(ev.event_type) - (sizeof(ev.event_type.event_type.name) - size); - err = process(&ev, session); + err = process(&ev, NULL, session); return err; } @@ -1126,7 +1153,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs, ev.tracing_data.header.size = sizeof(ev.tracing_data); ev.tracing_data.size = aligned_size; - process(&ev, session); + process(&ev, NULL, session); err = read_tracing_data(fd, pattrs, nb_events); write_padded(fd, NULL, 0, padding); @@ -1186,7 +1213,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc, ev.build_id.header.size = sizeof(ev.build_id) + len; memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); - err = process(&ev, session); + err = process(&ev, NULL, session); return err; } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 402ac2454cf..6335965e1f9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self); int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); +bool perf_header__sample_id_all(const struct perf_header *header); struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); void perf_header__set_feat(struct perf_header *self, int feat); +void perf_header__clear_feat(struct perf_header *self, int feat); bool perf_header__has_feat(const struct perf_header *self, int feat); int perf_header__process_sections(struct perf_header *self, int fd, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2022e874099..d5036700a43 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1092,6 +1092,12 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head, FILE *file; int err = 0; u64 len; + char symfs_filename[PATH_MAX]; + + if (filename) { + snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", + symbol_conf.symfs, filename); + } if (filename == NULL) { if (dso->has_build_id) { @@ -1100,9 +1106,9 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head, return -ENOMEM; } goto fallback; - } else if (readlink(filename, command, sizeof(command)) < 0 || + } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || strstr(command, "[kernel.kallsyms]") || - access(filename, R_OK)) { + access(symfs_filename, R_OK)) { free(filename); fallback: /* @@ -1111,6 +1117,8 @@ fallback: * DSO is the same as when 'perf record' ran. */ filename = dso->long_name; + snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", + symbol_conf.symfs, filename); free_filename = false; } @@ -1137,7 +1145,7 @@ fallback: "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), - filename, filename); + symfs_filename, filename); pr_debug("Executing: %s\n", command); @@ -1168,10 +1176,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) size_t ret = 0; for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { - if (!event__name[i]) + const char *name = event__get_event_name(i); + + if (!strcmp(name, "UNKNOWN")) continue; - ret += fprintf(fp, "%10s events: %10d\n", - event__name[i], self->stats.nr_events[i]); + + ret += fprintf(fp, "%16s events: %10d\n", name, + self->stats.nr_events[i]); } return ret; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 587d375d343..ee789856a8c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -52,8 +52,10 @@ struct sym_priv { struct events_stats { u64 total_period; u64 total_lost; + u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_unknown_events; + u32 nr_invalid_chains; }; enum hist_column { diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h new file mode 100644 index 00000000000..acffd5e4d1d --- /dev/null +++ b/tools/perf/util/include/asm/cpufeature.h @@ -0,0 +1,9 @@ + +#ifndef PERF_CPUFEATURE_H +#define PERF_CPUFEATURE_H + +/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ + +#define X86_FEATURE_REP_GOOD 0 + +#endif /* PERF_CPUFEATURE_H */ diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h new file mode 100644 index 00000000000..bb4198e7837 --- /dev/null +++ b/tools/perf/util/include/asm/dwarf2.h @@ -0,0 +1,11 @@ + +#ifndef PERF_DWARF2_H +#define PERF_DWARF2_H + +/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ + +#define CFI_STARTPROC +#define CFI_ENDPROC + +#endif /* PERF_DWARF2_H */ + diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index bb4ac2e0538..8be0b968ca0 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr) addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); } +static inline void clear_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG)); +} + static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h new file mode 100644 index 00000000000..06387cffe12 --- /dev/null +++ b/tools/perf/util/include/linux/linkage.h @@ -0,0 +1,13 @@ + +#ifndef PERF_LINUX_LINKAGE_H_ +#define PERF_LINUX_LINKAGE_H_ + +/* linkage.h ... for including arch/x86/lib/memcpy_64.S */ + +#define ENTRY(name) \ + .globl name; \ + name: + +#define ENDPROC(name) + +#endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4af5bd59cfd..c305305a388 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name, id = atoll(id_buf); attr->config = id; attr->type = PERF_TYPE_TRACEPOINT; - *strp = evt_name + evt_length; + *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */ attr->sample_type |= PERF_SAMPLE_RAW; attr->sample_type |= PERF_SAMPLE_TIME; @@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp, struct perf_event_attr *attr) { const char *evt_name; - char *flags; + char *flags = NULL, *comma_loc; char sys_name[MAX_EVENT_LENGTH]; unsigned int sys_length, evt_length; @@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp, sys_name[sys_length] = '\0'; evt_name = evt_name + 1; + comma_loc = strchr(evt_name, ','); + if (comma_loc) { + /* take the event name up to the comma */ + evt_name = strndup(evt_name, comma_loc - evt_name); + } flags = strchr(evt_name, ':'); if (flags) { /* split it out: */ @@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp, evt_length = strlen(evt_name); if (evt_length >= MAX_EVENT_LENGTH) return EVT_FAILED; - if (strpbrk(evt_name, "*?")) { - *strp = evt_name + evt_length; + *strp += strlen(sys_name) + evt_length; return parse_multiple_tracepoint_event(sys_name, evt_name, flags); } else diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index c7d72dce54b..abc31a1dac1 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -119,6 +119,10 @@ struct option { { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } +#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\ + .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\ + .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG} /* parse_options() will filter out the processed options and leave the * non-option argments in argv[]. diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 61191c6cbe7..128aaab0aed 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -95,7 +95,7 @@ static int init_vmlinux(void) goto out; if (machine__create_kernel_maps(&machine) < 0) { - pr_debug("machine__create_kernel_maps "); + pr_debug("machine__create_kernel_maps() failed.\n"); goto out; } out: @@ -149,7 +149,8 @@ static int open_vmlinux(const char *module) { const char *path = kernel_get_module_path(module); if (!path) { - pr_err("Failed to find path of %s module", module ?: "kernel"); + pr_err("Failed to find path of %s module.\n", + module ?: "kernel"); return -ENOENT; } pr_debug("Try to open %s\n", path); @@ -226,7 +227,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, pr_warning("Warning: No dwarf info found in the vmlinux - " "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); if (!need_dwarf) { - pr_debug("Trying to use symbols.\nn"); + pr_debug("Trying to use symbols.\n"); return 0; } } @@ -295,42 +296,49 @@ static int get_real_path(const char *raw_path, const char *comp_dir, #define LINEBUF_SIZE 256 #define NR_ADDITIONAL_LINES 2 -static int show_one_line(FILE *fp, int l, bool skip, bool show_num) +static int __show_one_line(FILE *fp, int l, bool skip, bool show_num) { char buf[LINEBUF_SIZE]; - const char *color = PERF_COLOR_BLUE; + const char *color = show_num ? "" : PERF_COLOR_BLUE; + const char *prefix = NULL; - if (fgets(buf, LINEBUF_SIZE, fp) == NULL) - goto error; - if (!skip) { - if (show_num) - fprintf(stdout, "%7d %s", l, buf); - else - color_fprintf(stdout, color, " %s", buf); - } - - while (strlen(buf) == LINEBUF_SIZE - 1 && - buf[LINEBUF_SIZE - 2] != '\n') { + do { if (fgets(buf, LINEBUF_SIZE, fp) == NULL) goto error; - if (!skip) { - if (show_num) - fprintf(stdout, "%s", buf); - else - color_fprintf(stdout, color, "%s", buf); + if (skip) + continue; + if (!prefix) { + prefix = show_num ? "%7d " : " "; + color_fprintf(stdout, color, prefix, l); } - } + color_fprintf(stdout, color, "%s", buf); - return 0; + } while (strchr(buf, '\n') == NULL); + + return 1; error: - if (feof(fp)) - pr_warning("Source file is shorter than expected.\n"); - else + if (ferror(fp)) { pr_warning("File read error: %s\n", strerror(errno)); + return -1; + } + return 0; +} - return -1; +static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) +{ + int rv = __show_one_line(fp, l, skip, show_num); + if (rv == 0) { + pr_warning("Source file is shorter than expected.\n"); + rv = -1; + } + return rv; } +#define show_one_line_with_num(f,l) _show_one_line(f,l,false,true) +#define show_one_line(f,l) _show_one_line(f,l,false,false) +#define skip_one_line(f,l) _show_one_line(f,l,true,false) +#define show_one_line_or_eof(f,l) __show_one_line(f,l,false,false) + /* * Show line-range always requires debuginfo to find source file and * line number. @@ -379,7 +387,7 @@ int show_line_range(struct line_range *lr, const char *module) fprintf(stdout, "<%s:%d>\n", lr->function, lr->start - lr->offset); else - fprintf(stdout, "<%s:%d>\n", lr->file, lr->start); + fprintf(stdout, "<%s:%d>\n", lr->path, lr->start); fp = fopen(lr->path, "r"); if (fp == NULL) { @@ -388,26 +396,30 @@ int show_line_range(struct line_range *lr, const char *module) return -errno; } /* Skip to starting line number */ - while (l < lr->start && ret >= 0) - ret = show_one_line(fp, l++, true, false); - if (ret < 0) - goto end; + while (l < lr->start) { + ret = skip_one_line(fp, l++); + if (ret < 0) + goto end; + } list_for_each_entry(ln, &lr->line_list, list) { - while (ln->line > l && ret >= 0) - ret = show_one_line(fp, (l++) - lr->offset, - false, false); - if (ret >= 0) - ret = show_one_line(fp, (l++) - lr->offset, - false, true); + for (; ln->line > l; l++) { + ret = show_one_line(fp, l - lr->offset); + if (ret < 0) + goto end; + } + ret = show_one_line_with_num(fp, l++ - lr->offset); if (ret < 0) goto end; } if (lr->end == INT_MAX) lr->end = l + NR_ADDITIONAL_LINES; - while (l <= lr->end && !feof(fp) && ret >= 0) - ret = show_one_line(fp, (l++) - lr->offset, false, false); + while (l <= lr->end) { + ret = show_one_line_or_eof(fp, l++ - lr->offset); + if (ret <= 0) + break; + } end: fclose(fp); return ret; @@ -466,7 +478,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, fd = open_vmlinux(module); if (fd < 0) { - pr_warning("Failed to open debuginfo file.\n"); + pr_warning("Failed to open debug information file.\n"); return fd; } @@ -526,56 +538,87 @@ int show_available_vars(struct perf_probe_event *pevs __unused, } #endif +static int parse_line_num(char **ptr, int *val, const char *what) +{ + const char *start = *ptr; + + errno = 0; + *val = strtol(*ptr, ptr, 0); + if (errno || *ptr == start) { + semantic_error("'%s' is not a valid number.\n", what); + return -EINVAL; + } + return 0; +} + +/* + * Stuff 'lr' according to the line range described by 'arg'. + * The line range syntax is described by: + * + * SRC[:SLN[+NUM|-ELN]] + * FNC[:SLN[+NUM|-ELN]] + */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - const char *ptr; - char *tmp; - /* - * <Syntax> - * SRC:SLN[+NUM|-ELN] - * FUNC[:SLN[+NUM|-ELN]] - */ - ptr = strchr(arg, ':'); - if (ptr) { - lr->start = (int)strtoul(ptr + 1, &tmp, 0); - if (*tmp == '+') { - lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0); - lr->end--; /* - * Adjust the number of lines here. - * If the number of lines == 1, the - * the end of line should be equal to - * the start of line. - */ - } else if (*tmp == '-') - lr->end = (int)strtoul(tmp + 1, &tmp, 0); - else - lr->end = INT_MAX; + char *range, *name = strdup(arg); + int err; + + if (!name) + return -ENOMEM; + + lr->start = 0; + lr->end = INT_MAX; + + range = strchr(name, ':'); + if (range) { + *range++ = '\0'; + + err = parse_line_num(&range, &lr->start, "start line"); + if (err) + goto err; + + if (*range == '+' || *range == '-') { + const char c = *range++; + + err = parse_line_num(&range, &lr->end, "end line"); + if (err) + goto err; + + if (c == '+') { + lr->end += lr->start; + /* + * Adjust the number of lines here. + * If the number of lines == 1, the + * the end of line should be equal to + * the start of line. + */ + lr->end--; + } + } + pr_debug("Line range is %d to %d\n", lr->start, lr->end); + + err = -EINVAL; if (lr->start > lr->end) { semantic_error("Start line must be smaller" " than end line.\n"); - return -EINVAL; + goto err; } - if (*tmp != '\0') { - semantic_error("Tailing with invalid character '%d'.\n", - *tmp); - return -EINVAL; + if (*range != '\0') { + semantic_error("Tailing with invalid str '%s'.\n", range); + goto err; } - tmp = strndup(arg, (ptr - arg)); - } else { - tmp = strdup(arg); - lr->end = INT_MAX; } - if (tmp == NULL) - return -ENOMEM; - - if (strchr(tmp, '.')) - lr->file = tmp; + if (strchr(name, '.')) + lr->file = name; else - lr->function = tmp; + lr->function = name; return 0; +err: + free(name); + return err; } /* Check the name is good for event/group */ @@ -699,39 +742,40 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) /* Exclusion check */ if (pp->lazy_line && pp->line) { - semantic_error("Lazy pattern can't be used with line number."); + semantic_error("Lazy pattern can't be used with" + " line number.\n"); return -EINVAL; } if (pp->lazy_line && pp->offset) { - semantic_error("Lazy pattern can't be used with offset."); + semantic_error("Lazy pattern can't be used with offset.\n"); return -EINVAL; } if (pp->line && pp->offset) { - semantic_error("Offset can't be used with line number."); + semantic_error("Offset can't be used with line number.\n"); return -EINVAL; } if (!pp->line && !pp->lazy_line && pp->file && !pp->function) { semantic_error("File always requires line number or " - "lazy pattern."); + "lazy pattern.\n"); return -EINVAL; } if (pp->offset && !pp->function) { - semantic_error("Offset requires an entry function."); + semantic_error("Offset requires an entry function.\n"); return -EINVAL; } if (pp->retprobe && !pp->function) { - semantic_error("Return probe requires an entry function."); + semantic_error("Return probe requires an entry function.\n"); return -EINVAL; } if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) { semantic_error("Offset/Line/Lazy pattern can't be used with " - "return probe."); + "return probe.\n"); return -EINVAL; } @@ -1005,7 +1049,7 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len) return tmp - buf; error: - pr_debug("Failed to synthesize perf probe argument: %s", + pr_debug("Failed to synthesize perf probe argument: %s\n", strerror(-ret)); return ret; } @@ -1033,13 +1077,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp) goto error; } if (pp->file) { - len = strlen(pp->file) - 31; - if (len < 0) - len = 0; - tmp = strchr(pp->file + len, '/'); - if (!tmp) - tmp = pp->file + len; - ret = e_snprintf(file, 32, "@%s", tmp + 1); + tmp = pp->file; + len = strlen(tmp); + if (len > 30) { + tmp = strchr(pp->file + len - 30, '/'); + tmp = tmp ? tmp + 1 : pp->file + len - 30; + } + ret = e_snprintf(file, 32, "@%s", tmp); if (ret <= 0) goto error; } @@ -1055,7 +1099,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp) return buf; error: - pr_debug("Failed to synthesize perf probe point: %s", + pr_debug("Failed to synthesize perf probe point: %s\n", strerror(-ret)); if (buf) free(buf); @@ -1796,7 +1840,7 @@ static int del_trace_probe_event(int fd, const char *group, ret = e_snprintf(buf, 128, "%s:%s", group, event); if (ret < 0) { - pr_err("Failed to copy event."); + pr_err("Failed to copy event.\n"); return ret; } diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index ddf4d455632..ab83b6ac5d6 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -652,8 +652,8 @@ static_var: regs = get_arch_regstr(regn); if (!regs) { /* This should be a bug in DWARF or this tool */ - pr_warning("Mapping for DWARF register number %u " - "missing on this architecture.", regn); + pr_warning("Mapping for the register number %u " + "missing on this architecture.\n", regn); return -ERANGE; } @@ -699,13 +699,14 @@ static int convert_variable_type(Dwarf_Die *vr_die, if (ret != DW_TAG_pointer_type && ret != DW_TAG_array_type) { pr_warning("Failed to cast into string: " - "%s(%s) is not a pointer nor array.", + "%s(%s) is not a pointer nor array.\n", dwarf_diename(vr_die), dwarf_diename(&type)); return -EINVAL; } if (ret == DW_TAG_pointer_type) { if (die_get_real_type(&type, &type) == NULL) { - pr_warning("Failed to get a type information."); + pr_warning("Failed to get a type" + " information.\n"); return -ENOENT; } while (*ref_ptr) @@ -720,7 +721,7 @@ static int convert_variable_type(Dwarf_Die *vr_die, if (!die_compare_name(&type, "char") && !die_compare_name(&type, "unsigned char")) { pr_warning("Failed to cast into string: " - "%s is not (unsigned) char *.", + "%s is not (unsigned) char *.\n", dwarf_diename(vr_die)); return -EINVAL; } @@ -830,8 +831,8 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, return -EINVAL; } if (field->name[0] == '[') { - pr_err("Semantic error: %s is not a pointor nor array.", - varname); + pr_err("Semantic error: %s is not a pointor" + " nor array.\n", varname); return -EINVAL; } if (field->ref) { @@ -978,7 +979,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, name = dwarf_diename(sp_die); if (name) { if (dwarf_entrypc(sp_die, &eaddr) != 0) { - pr_warning("Failed to get entry pc of %s\n", + pr_warning("Failed to get entry address of %s\n", dwarf_diename(sp_die)); return -ENOENT; } @@ -994,7 +995,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, if (retprobe) { if (eaddr != paddr) { pr_warning("Return probe must be on the head of" - " a real function\n"); + " a real function.\n"); return -EINVAL; } tp->retprobe = true; @@ -1033,7 +1034,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) Dwarf_Frame *frame; if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 || dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) { - pr_warning("Failed to get CFA on 0x%jx\n", + pr_warning("Failed to get call frame on 0x%jx\n", (uintmax_t)pf->addr); return -ENOENT; } @@ -1060,7 +1061,7 @@ static int find_probe_point_by_line(struct probe_finder *pf) int ret = 0; if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { - pr_warning("No source lines found in this CU.\n"); + pr_warning("No source lines found.\n"); return -ENOENT; } @@ -1162,7 +1163,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) } if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { - pr_warning("No source lines found in this CU.\n"); + pr_warning("No source lines found.\n"); return -ENOENT; } @@ -1220,7 +1221,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) else { /* Get probe address */ if (dwarf_entrypc(in_die, &addr) != 0) { - pr_warning("Failed to get entry pc of %s.\n", + pr_warning("Failed to get entry address of %s.\n", dwarf_diename(in_die)); param->retval = -ENOENT; return DWARF_CB_ABORT; @@ -1261,8 +1262,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) param->retval = find_probe_point_lazy(sp_die, pf); else { if (dwarf_entrypc(sp_die, &pf->addr) != 0) { - pr_warning("Failed to get entry pc of %s.\n", - dwarf_diename(sp_die)); + pr_warning("Failed to get entry address of " + "%s.\n", dwarf_diename(sp_die)); param->retval = -ENOENT; return DWARF_CB_ABORT; } @@ -1304,7 +1305,7 @@ static int find_probes(int fd, struct probe_finder *pf) dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); if (!dbg) { - pr_warning("No dwarf info found in the vmlinux - " + pr_warning("No debug information found in the vmlinux - " "please rebuild with CONFIG_DEBUG_INFO=y.\n"); return -EBADF; } @@ -1549,7 +1550,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt) /* Open the live linux kernel */ dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias); if (!dbg) { - pr_warning("No dwarf info found in the vmlinux - " + pr_warning("No debug information found in the vmlinux - " "please rebuild with CONFIG_DEBUG_INFO=y.\n"); ret = -EINVAL; goto end; @@ -1559,7 +1560,8 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt) addr += bias; /* Find cu die */ if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) { - pr_warning("No CU DIE is found at %lx\n", addr); + pr_warning("Failed to find debug information for address %lx\n", + addr); ret = -EINVAL; goto end; } @@ -1684,7 +1686,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) line_list__init(&lf->lr->line_list); if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) { - pr_warning("No source lines found in this CU.\n"); + pr_warning("No source lines found.\n"); return -ENOENT; } @@ -1809,7 +1811,7 @@ int find_line_range(int fd, struct line_range *lr) dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); if (!dbg) { - pr_warning("No dwarf info found in the vmlinux - " + pr_warning("No debug information found in the vmlinux - " "please rebuild with CONFIG_DEBUG_INFO=y.\n"); return -EBADF; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index bba69d45569..beaefc3c122 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev, bool externs); #include <dwarf.h> -#include <libdw.h> -#include <libdwfl.h> -#include <version.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <elfutils/version.h> struct probe_finder { struct perf_probe_event *pev; /* Target probe event */ diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b059dc50cc2..93680818e24 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -1,5 +1,5 @@ /* - * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. + * trace-event-perl. Feed perf script events to an embedded Perl interpreter. * * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> * @@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile) return -1; } - fprintf(ofp, "# perf trace event handlers, " - "generated by perf trace -g perl\n"); + fprintf(ofp, "# perf script event handlers, " + "generated by perf script -g perl\n"); fprintf(ofp, "# Licensed under the terms of the GNU GPL" " License version 2\n\n"); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 33a63252374..c6d99334bdf 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile) fprintf(stderr, "couldn't open %s\n", fname); return -1; } - fprintf(ofp, "# perf trace event handlers, " - "generated by perf trace -g python\n"); + fprintf(ofp, "# perf script event handlers, " + "generated by perf script -g python\n"); fprintf(ofp, "# Licensed under the terms of the GNU GPL" " License version 2\n\n"); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fa9d652c2dc..0f7e544544f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -65,9 +65,49 @@ out_close: return -1; } +static void perf_session__id_header_size(struct perf_session *session) +{ + struct sample_data *data; + u64 sample_type = session->sample_type; + u16 size = 0; + + if (!session->sample_id_all) + goto out; + + if (sample_type & PERF_SAMPLE_TID) + size += sizeof(data->tid) * 2; + + if (sample_type & PERF_SAMPLE_TIME) + size += sizeof(data->time); + + if (sample_type & PERF_SAMPLE_ID) + size += sizeof(data->id); + + if (sample_type & PERF_SAMPLE_STREAM_ID) + size += sizeof(data->stream_id); + + if (sample_type & PERF_SAMPLE_CPU) + size += sizeof(data->cpu) * 2; +out: + session->id_hdr_size = size; +} + +void perf_session__set_sample_id_all(struct perf_session *session, bool value) +{ + session->sample_id_all = value; + perf_session__id_header_size(session); +} + +void perf_session__set_sample_type(struct perf_session *session, u64 type) +{ + session->sample_type = type; +} + void perf_session__update_sample_type(struct perf_session *self) { self->sample_type = perf_header__sample_type(&self->header); + self->sample_id_all = perf_header__sample_id_all(&self->header); + perf_session__id_header_size(self); } int perf_session__create_kernel_maps(struct perf_session *self) @@ -85,7 +125,9 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self) machines__destroy_guest_kernel_maps(&self->machines); } -struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe) +struct perf_session *perf_session__new(const char *filename, int mode, + bool force, bool repipe, + struct perf_event_ops *ops) { size_t len = filename ? strlen(filename) + 1 : 0; struct perf_session *self = zalloc(sizeof(*self) + len); @@ -101,10 +143,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc INIT_LIST_HEAD(&self->dead_threads); self->hists_tree = RB_ROOT; self->last_match = NULL; - self->mmap_window = 32; + /* + * On 64bit we can mmap the data file in one go. No need for tiny mmap + * slices. On 32bit we use 32MB. + */ +#if BITS_PER_LONG == 64 + self->mmap_window = ULLONG_MAX; +#else + self->mmap_window = 32 * 1024 * 1024ULL; +#endif self->machines = RB_ROOT; self->repipe = repipe; - INIT_LIST_HEAD(&self->ordered_samples.samples_head); + INIT_LIST_HEAD(&self->ordered_samples.samples); + INIT_LIST_HEAD(&self->ordered_samples.sample_cache); + INIT_LIST_HEAD(&self->ordered_samples.to_free); machine__init(&self->host_machine, "", HOST_KERNEL_ID); if (mode == O_RDONLY) { @@ -120,6 +172,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc } perf_session__update_sample_type(self); + + if (ops && ops->ordering_requires_timestamps && + ops->ordered_samples && !self->sample_id_all) { + dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); + ops->ordered_samples = false; + } + out: return self; out_free: @@ -230,7 +289,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, return syms; } +static int process_event_synth_stub(event_t *event __used, + struct perf_session *session __used) +{ + dump_printf(": unhandled!\n"); + return 0; +} + static int process_event_stub(event_t *event __used, + struct sample_data *sample __used, struct perf_session *session __used) { dump_printf(": unhandled!\n"); @@ -262,7 +329,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) if (handler->exit == NULL) handler->exit = process_event_stub; if (handler->lost == NULL) - handler->lost = process_event_stub; + handler->lost = event__process_lost; if (handler->read == NULL) handler->read = process_event_stub; if (handler->throttle == NULL) @@ -270,13 +337,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) if (handler->unthrottle == NULL) handler->unthrottle = process_event_stub; if (handler->attr == NULL) - handler->attr = process_event_stub; + handler->attr = process_event_synth_stub; if (handler->event_type == NULL) - handler->event_type = process_event_stub; + handler->event_type = process_event_synth_stub; if (handler->tracing_data == NULL) - handler->tracing_data = process_event_stub; + handler->tracing_data = process_event_synth_stub; if (handler->build_id == NULL) - handler->build_id = process_event_stub; + handler->build_id = process_event_synth_stub; if (handler->finished_round == NULL) { if (handler->ordered_samples) handler->finished_round = process_finished_round; @@ -386,33 +453,61 @@ static event__swap_op event__swap_ops[] = { struct sample_queue { u64 timestamp; - struct sample_event *event; + u64 file_offset; + event_t *event; struct list_head list; }; +static void perf_session_free_sample_buffers(struct perf_session *session) +{ + struct ordered_samples *os = &session->ordered_samples; + + while (!list_empty(&os->to_free)) { + struct sample_queue *sq; + + sq = list_entry(os->to_free.next, struct sample_queue, list); + list_del(&sq->list); + free(sq); + } +} + +static int perf_session_deliver_event(struct perf_session *session, + event_t *event, + struct sample_data *sample, + struct perf_event_ops *ops, + u64 file_offset); + static void flush_sample_queue(struct perf_session *s, struct perf_event_ops *ops) { - struct list_head *head = &s->ordered_samples.samples_head; - u64 limit = s->ordered_samples.next_flush; + struct ordered_samples *os = &s->ordered_samples; + struct list_head *head = &os->samples; struct sample_queue *tmp, *iter; + struct sample_data sample; + u64 limit = os->next_flush; + u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; if (!ops->ordered_samples || !limit) return; list_for_each_entry_safe(iter, tmp, head, list) { if (iter->timestamp > limit) - return; - - if (iter == s->ordered_samples.last_inserted) - s->ordered_samples.last_inserted = NULL; + break; - ops->sample((event_t *)iter->event, s); + event__parse_sample(iter->event, s, &sample); + perf_session_deliver_event(s, iter->event, &sample, ops, + iter->file_offset); - s->ordered_samples.last_flush = iter->timestamp; + os->last_flush = iter->timestamp; list_del(&iter->list); - free(iter->event); - free(iter); + list_add(&iter->list, &os->sample_cache); + } + + if (list_empty(head)) { + os->last_sample = NULL; + } else if (last_ts <= limit) { + os->last_sample = + list_entry(head->prev, struct sample_queue, list); } } @@ -465,176 +560,263 @@ static int process_finished_round(event_t *event __used, return 0; } -static void __queue_sample_end(struct sample_queue *new, struct list_head *head) -{ - struct sample_queue *iter; - - list_for_each_entry_reverse(iter, head, list) { - if (iter->timestamp < new->timestamp) { - list_add(&new->list, &iter->list); - return; - } - } - - list_add(&new->list, head); -} - -static void __queue_sample_before(struct sample_queue *new, - struct sample_queue *iter, - struct list_head *head) -{ - list_for_each_entry_continue_reverse(iter, head, list) { - if (iter->timestamp < new->timestamp) { - list_add(&new->list, &iter->list); - return; - } - } - - list_add(&new->list, head); -} - -static void __queue_sample_after(struct sample_queue *new, - struct sample_queue *iter, - struct list_head *head) -{ - list_for_each_entry_continue(iter, head, list) { - if (iter->timestamp > new->timestamp) { - list_add_tail(&new->list, &iter->list); - return; - } - } - list_add_tail(&new->list, head); -} - /* The queue is ordered by time */ -static void __queue_sample_event(struct sample_queue *new, - struct perf_session *s) +static void __queue_event(struct sample_queue *new, struct perf_session *s) { - struct sample_queue *last_inserted = s->ordered_samples.last_inserted; - struct list_head *head = &s->ordered_samples.samples_head; + struct ordered_samples *os = &s->ordered_samples; + struct sample_queue *sample = os->last_sample; + u64 timestamp = new->timestamp; + struct list_head *p; + os->last_sample = new; - if (!last_inserted) { - __queue_sample_end(new, head); + if (!sample) { + list_add(&new->list, &os->samples); + os->max_timestamp = timestamp; return; } /* - * Most of the time the current event has a timestamp - * very close to the last event inserted, unless we just switched - * to another event buffer. Having a sorting based on a list and - * on the last inserted event that is close to the current one is - * probably more efficient than an rbtree based sorting. + * last_sample might point to some random place in the list as it's + * the last queued event. We expect that the new event is close to + * this. */ - if (last_inserted->timestamp >= new->timestamp) - __queue_sample_before(new, last_inserted, head); - else - __queue_sample_after(new, last_inserted, head); + if (sample->timestamp <= timestamp) { + while (sample->timestamp <= timestamp) { + p = sample->list.next; + if (p == &os->samples) { + list_add_tail(&new->list, &os->samples); + os->max_timestamp = timestamp; + return; + } + sample = list_entry(p, struct sample_queue, list); + } + list_add_tail(&new->list, &sample->list); + } else { + while (sample->timestamp > timestamp) { + p = sample->list.prev; + if (p == &os->samples) { + list_add(&new->list, &os->samples); + return; + } + sample = list_entry(p, struct sample_queue, list); + } + list_add(&new->list, &sample->list); + } } -static int queue_sample_event(event_t *event, struct sample_data *data, - struct perf_session *s) +#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) + +static int perf_session_queue_event(struct perf_session *s, event_t *event, + struct sample_data *data, u64 file_offset) { + struct ordered_samples *os = &s->ordered_samples; + struct list_head *sc = &os->sample_cache; u64 timestamp = data->time; struct sample_queue *new; + if (!timestamp || timestamp == ~0ULL) + return -ETIME; if (timestamp < s->ordered_samples.last_flush) { printf("Warning: Timestamp below last timeslice flush\n"); return -EINVAL; } - new = malloc(sizeof(*new)); - if (!new) - return -ENOMEM; + if (!list_empty(sc)) { + new = list_entry(sc->next, struct sample_queue, list); + list_del(&new->list); + } else if (os->sample_buffer) { + new = os->sample_buffer + os->sample_buffer_idx; + if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER) + os->sample_buffer = NULL; + } else { + os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new)); + if (!os->sample_buffer) + return -ENOMEM; + list_add(&os->sample_buffer->list, &os->to_free); + os->sample_buffer_idx = 2; + new = os->sample_buffer + 1; + } new->timestamp = timestamp; + new->file_offset = file_offset; + new->event = event; - new->event = malloc(event->header.size); - if (!new->event) { - free(new); - return -ENOMEM; - } + __queue_event(new, s); - memcpy(new->event, event, event->header.size); + return 0; +} - __queue_sample_event(new, s); - s->ordered_samples.last_inserted = new; +static void callchain__printf(struct sample_data *sample) +{ + unsigned int i; - if (new->timestamp > s->ordered_samples.max_timestamp) - s->ordered_samples.max_timestamp = new->timestamp; + printf("... chain: nr:%Lu\n", sample->callchain->nr); - return 0; + for (i = 0; i < sample->callchain->nr; i++) + printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]); } -static int perf_session__process_sample(event_t *event, struct perf_session *s, - struct perf_event_ops *ops) +static void perf_session__print_tstamp(struct perf_session *session, + event_t *event, + struct sample_data *sample) { - struct sample_data data; + if (event->header.type != PERF_RECORD_SAMPLE && + !session->sample_id_all) { + fputs("-1 -1 ", stdout); + return; + } - if (!ops->ordered_samples) - return ops->sample(event, s); + if ((session->sample_type & PERF_SAMPLE_CPU)) + printf("%u ", sample->cpu); - bzero(&data, sizeof(struct sample_data)); - event__parse_sample(event, s->sample_type, &data); + if (session->sample_type & PERF_SAMPLE_TIME) + printf("%Lu ", sample->time); +} - queue_sample_event(event, &data, s); +static void dump_event(struct perf_session *session, event_t *event, + u64 file_offset, struct sample_data *sample) +{ + if (!dump_trace) + return; - return 0; + printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size, + event->header.type); + + trace_event(event); + + if (sample) + perf_session__print_tstamp(session, event, sample); + + printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size, + event__get_event_name(event->header.type)); } -static int perf_session__process_event(struct perf_session *self, - event_t *event, - struct perf_event_ops *ops, - u64 offset, u64 head) +static void dump_sample(struct perf_session *session, event_t *event, + struct sample_data *sample) { - trace_event(event); + if (!dump_trace) + return; - if (event->header.type < PERF_RECORD_HEADER_MAX) { - dump_printf("%#Lx [%#x]: PERF_RECORD_%s", - offset + head, event->header.size, - event__name[event->header.type]); - hists__inc_nr_events(&self->hists, event->header.type); - } + printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc, + sample->pid, sample->tid, sample->ip, sample->period); - if (self->header.needs_swap && event__swap_ops[event->header.type]) - event__swap_ops[event->header.type](event); + if (session->sample_type & PERF_SAMPLE_CALLCHAIN) + callchain__printf(sample); +} + +static int perf_session_deliver_event(struct perf_session *session, + event_t *event, + struct sample_data *sample, + struct perf_event_ops *ops, + u64 file_offset) +{ + dump_event(session, event, file_offset, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: - return perf_session__process_sample(event, self, ops); + dump_sample(session, event, sample); + return ops->sample(event, sample, session); case PERF_RECORD_MMAP: - return ops->mmap(event, self); + return ops->mmap(event, sample, session); case PERF_RECORD_COMM: - return ops->comm(event, self); + return ops->comm(event, sample, session); case PERF_RECORD_FORK: - return ops->fork(event, self); + return ops->fork(event, sample, session); case PERF_RECORD_EXIT: - return ops->exit(event, self); + return ops->exit(event, sample, session); case PERF_RECORD_LOST: - return ops->lost(event, self); + return ops->lost(event, sample, session); case PERF_RECORD_READ: - return ops->read(event, self); + return ops->read(event, sample, session); case PERF_RECORD_THROTTLE: - return ops->throttle(event, self); + return ops->throttle(event, sample, session); case PERF_RECORD_UNTHROTTLE: - return ops->unthrottle(event, self); + return ops->unthrottle(event, sample, session); + default: + ++session->hists.stats.nr_unknown_events; + return -1; + } +} + +static int perf_session__preprocess_sample(struct perf_session *session, + event_t *event, struct sample_data *sample) +{ + if (event->header.type != PERF_RECORD_SAMPLE || + !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) + return 0; + + if (!ip_callchain__valid(sample->callchain, event)) { + pr_debug("call-chain problem with event, skipping it.\n"); + ++session->hists.stats.nr_invalid_chains; + session->hists.stats.total_invalid_chains += sample->period; + return -EINVAL; + } + return 0; +} + +static int perf_session__process_user_event(struct perf_session *session, event_t *event, + struct perf_event_ops *ops, u64 file_offset) +{ + dump_event(session, event, file_offset, NULL); + + /* These events are processed right away */ + switch (event->header.type) { case PERF_RECORD_HEADER_ATTR: - return ops->attr(event, self); + return ops->attr(event, session); case PERF_RECORD_HEADER_EVENT_TYPE: - return ops->event_type(event, self); + return ops->event_type(event, session); case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ - lseek(self->fd, offset + head, SEEK_SET); - return ops->tracing_data(event, self); + lseek(session->fd, file_offset, SEEK_SET); + return ops->tracing_data(event, session); case PERF_RECORD_HEADER_BUILD_ID: - return ops->build_id(event, self); + return ops->build_id(event, session); case PERF_RECORD_FINISHED_ROUND: - return ops->finished_round(event, self, ops); + return ops->finished_round(event, session, ops); default: - ++self->hists.stats.nr_unknown_events; - return -1; + return -EINVAL; + } +} + +static int perf_session__process_event(struct perf_session *session, + event_t *event, + struct perf_event_ops *ops, + u64 file_offset) +{ + struct sample_data sample; + int ret; + + if (session->header.needs_swap && event__swap_ops[event->header.type]) + event__swap_ops[event->header.type](event); + + if (event->header.type >= PERF_RECORD_HEADER_MAX) + return -EINVAL; + + hists__inc_nr_events(&session->hists, event->header.type); + + if (event->header.type >= PERF_RECORD_USER_TYPE_START) + return perf_session__process_user_event(session, event, ops, file_offset); + + /* + * For all kernel events we get the sample data + */ + event__parse_sample(event, session, &sample); + + /* Preprocess sample records - precheck callchains */ + if (perf_session__preprocess_sample(session, event, &sample)) + return 0; + + if (ops->ordered_samples) { + ret = perf_session_queue_event(session, event, &sample, + file_offset); + if (ret != -ETIME) + return ret; } + + return perf_session_deliver_event(session, event, &sample, ops, + file_offset); } void perf_event_header__bswap(struct perf_event_header *self) @@ -724,8 +906,7 @@ more: } if (size == 0 || - (skip = perf_session__process_event(self, &event, ops, - 0, head)) < 0) { + (skip = perf_session__process_event(self, &event, ops, head)) < 0) { dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", head, event.header.size, event.header.type); /* @@ -740,9 +921,6 @@ more: head += size; - dump_printf("\n%#Lx [%#x]: event: %d\n", - head, event.header.size, event.header.type); - if (skip > 0) head += skip; @@ -751,82 +929,90 @@ more: done: err = 0; out_err: + perf_session_free_sample_buffers(self); return err; } -int __perf_session__process_events(struct perf_session *self, +int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, u64 file_size, struct perf_event_ops *ops) { - int err, mmap_prot, mmap_flags; - u64 head, shift; - u64 offset = 0; - size_t page_size; + u64 head, page_offset, file_offset, file_pos, progress_next; + int err, mmap_prot, mmap_flags, map_idx = 0; + struct ui_progress *progress; + size_t page_size, mmap_size; + char *buf, *mmaps[8]; event_t *event; uint32_t size; - char *buf; - struct ui_progress *progress = ui_progress__new("Processing events...", - self->size); - if (progress == NULL) - return -1; perf_event_ops__fill_defaults(ops); page_size = sysconf(_SC_PAGESIZE); - head = data_offset; - shift = page_size * (head / page_size); - offset += shift; - head -= shift; + page_offset = page_size * (data_offset / page_size); + file_offset = page_offset; + head = data_offset - page_offset; + + if (data_offset + data_size < file_size) + file_size = data_offset + data_size; + + progress_next = file_size / 16; + progress = ui_progress__new("Processing events...", file_size); + if (progress == NULL) + return -1; + + mmap_size = session->mmap_window; + if (mmap_size > file_size) + mmap_size = file_size; + + memset(mmaps, 0, sizeof(mmaps)); mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; - if (self->header.needs_swap) { + if (session->header.needs_swap) { mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, - mmap_flags, self->fd, offset); + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd, + file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); err = -errno; goto out_err; } + mmaps[map_idx] = buf; + map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); + file_pos = file_offset + head; more: event = (event_t *)(buf + head); - ui_progress__update(progress, offset); - if (self->header.needs_swap) + if (session->header.needs_swap) perf_event_header__bswap(&event->header); size = event->header.size; if (size == 0) size = 8; - if (head + event->header.size >= page_size * self->mmap_window) { - int munmap_ret; - - shift = page_size * (head / page_size); - - munmap_ret = munmap(buf, page_size * self->mmap_window); - assert(munmap_ret == 0); + if (head + event->header.size >= mmap_size) { + if (mmaps[map_idx]) { + munmap(mmaps[map_idx], mmap_size); + mmaps[map_idx] = NULL; + } - offset += shift; - head -= shift; + page_offset = page_size * (head / page_size); + file_offset += page_offset; + head -= page_offset; goto remap; } size = event->header.size; - dump_printf("\n%#Lx [%#x]: event: %d\n", - offset + head, event->header.size, event->header.type); - if (size == 0 || - perf_session__process_event(self, event, ops, offset, head) < 0) { + perf_session__process_event(session, event, ops, file_pos) < 0) { dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", - offset + head, event->header.size, + file_offset + head, event->header.size, event->header.type); /* * assume we lost track of the stream, check alignment, and @@ -839,19 +1025,49 @@ more: } head += size; + file_pos += size; - if (offset + head >= data_offset + data_size) - goto done; + if (file_pos >= progress_next) { + progress_next += file_size / 16; + ui_progress__update(progress, file_pos); + } - if (offset + head < file_size) + if (file_pos < file_size) goto more; -done: + err = 0; /* do the final flush for ordered samples */ - self->ordered_samples.next_flush = ULLONG_MAX; - flush_sample_queue(self, ops); + session->ordered_samples.next_flush = ULLONG_MAX; + flush_sample_queue(session, ops); out_err: ui_progress__delete(progress); + + if (ops->lost == event__process_lost && + session->hists.stats.total_lost != 0) { + ui__warning("Processed %Lu events and LOST %Lu!\n\n" + "Check IO/CPU overload!\n\n", + session->hists.stats.total_period, + session->hists.stats.total_lost); + } + + if (session->hists.stats.nr_unknown_events != 0) { + ui__warning("Found %u unknown events!\n\n" + "Is this an older tool processing a perf.data " + "file generated by a more recent tool?\n\n" + "If that is not the case, consider " + "reporting to linux-kernel@vger.kernel.org.\n\n", + session->hists.stats.nr_unknown_events); + } + + if (session->hists.stats.nr_invalid_chains != 0) { + ui__warning("Found invalid callchains!\n\n" + "%u out of %u events were discarded for this reason.\n\n" + "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + session->hists.stats.nr_invalid_chains, + session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); + } + + perf_session_free_sample_buffers(session); return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 9fa0fc2a863..ffe4b98db8f 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -17,8 +17,12 @@ struct ordered_samples { u64 last_flush; u64 next_flush; u64 max_timestamp; - struct list_head samples_head; - struct sample_queue *last_inserted; + struct list_head samples; + struct list_head sample_cache; + struct list_head to_free; + struct sample_queue *sample_buffer; + struct sample_queue *last_sample; + int sample_buffer_idx; }; struct perf_session { @@ -42,6 +46,8 @@ struct perf_session { int fd; bool fd_pipe; bool repipe; + bool sample_id_all; + u16 id_hdr_size; int cwdlen; char *cwd; struct ordered_samples ordered_samples; @@ -50,7 +56,9 @@ struct perf_session { struct perf_event_ops; -typedef int (*event_op)(event_t *self, struct perf_session *session); +typedef int (*event_op)(event_t *self, struct sample_data *sample, + struct perf_session *session); +typedef int (*event_synth_op)(event_t *self, struct perf_session *session); typedef int (*event_op2)(event_t *self, struct perf_session *session, struct perf_event_ops *ops); @@ -63,16 +71,19 @@ struct perf_event_ops { lost, read, throttle, - unthrottle, - attr, + unthrottle; + event_synth_op attr, event_type, tracing_data, build_id; event_op2 finished_round; bool ordered_samples; + bool ordering_requires_timestamps; }; -struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe); +struct perf_session *perf_session__new(const char *filename, int mode, + bool force, bool repipe, + struct perf_event_ops *ops); void perf_session__delete(struct perf_session *self); void perf_event_header__bswap(struct perf_event_header *self); @@ -100,6 +111,8 @@ int perf_session__create_kernel_maps(struct perf_session *self); int do_read(int fd, void *buf, size_t size); void perf_session__update_sample_type(struct perf_session *self); +void perf_session__set_sample_id_all(struct perf_session *session, bool value); +void perf_session__set_sample_type(struct perf_session *session, u64 type); void perf_session__remove_thread(struct perf_session *self, struct thread *th); static inline diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index b62a553cc67..f44fa541d56 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, return repsep_snprintf(bf, size, "%-*s", width, dso_name); } - return repsep_snprintf(bf, size, "%*Lx", width, self->ip); + return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); } /* --sort symbol */ @@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, if (verbose) { char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; - ret += repsep_snprintf(bf, size, "%*Lx %c ", + ret += repsep_snprintf(bf, size, "%-#*llx %c ", BITS_PER_LONG / 4, self->ip, o); } @@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, ret += repsep_snprintf(bf + ret, size - ret, "%s", self->ms.sym->name); else - ret += repsep_snprintf(bf + ret, size - ret, "%*Lx", + ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", BITS_PER_LONG / 4, self->ip); return ret; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 439ab947daf..15ccfba8cdf 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -22,6 +22,10 @@ #include <limits.h> #include <sys/utsname.h> +#ifndef KSYM_NAME_LEN +#define KSYM_NAME_LEN 128 +#endif + #ifndef NT_GNU_BUILD_ID #define NT_GNU_BUILD_ID 3 #endif @@ -41,6 +45,7 @@ struct symbol_conf symbol_conf = { .exclude_other = true, .use_modules = true, .try_vmlinux_path = true, + .symfs = "", }; int dso__name_len(const struct dso *self) @@ -92,7 +97,7 @@ static void symbols__fixup_end(struct rb_root *self) prev = curr; curr = rb_entry(nd, struct symbol, rb_node); - if (prev->end == prev->start) + if (prev->end == prev->start && prev->end != curr->start) prev->end = curr->start - 1; } @@ -121,7 +126,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type) * We still haven't the actual symbols, so guess the * last map final address. */ - curr->end = ~0UL; + curr->end = ~0ULL; } static void map_groups__fixup_end(struct map_groups *self) @@ -425,16 +430,25 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start)) + char type, u64 start, u64 end)) { char *line = NULL; size_t n; - int err = 0; + int err = -1; + u64 prev_start = 0; + char prev_symbol_type = 0; + char *prev_symbol_name; FILE *file = fopen(filename, "r"); if (file == NULL) goto out_failure; + prev_symbol_name = malloc(KSYM_NAME_LEN); + if (prev_symbol_name == NULL) + goto out_close; + + err = 0; + while (!feof(file)) { u64 start; int line_len, len; @@ -454,14 +468,33 @@ int kallsyms__parse(const char *filename, void *arg, continue; symbol_type = toupper(line[len]); - symbol_name = line + len + 2; + len += 2; + symbol_name = line + len; + len = line_len - len; - err = process_symbol(arg, symbol_name, symbol_type, start); - if (err) + if (len >= KSYM_NAME_LEN) { + err = -1; break; + } + + if (prev_symbol_type) { + u64 end = start; + if (end != prev_start) + --end; + err = process_symbol(arg, prev_symbol_name, + prev_symbol_type, prev_start, end); + if (err) + break; + } + + memcpy(prev_symbol_name, symbol_name, len + 1); + prev_symbol_type = symbol_type; + prev_start = start; } + free(prev_symbol_name); free(line); +out_close: fclose(file); return err; @@ -483,7 +516,7 @@ static u8 kallsyms2elf_type(char type) } static int map__process_kallsym_symbol(void *arg, const char *name, - char type, u64 start) + char type, u64 start, u64 end) { struct symbol *sym; struct process_kallsyms_args *a = arg; @@ -492,11 +525,8 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__is_a(type, a->map->type)) return 0; - /* - * Will fix up the end later, when we have all symbols sorted. - */ - sym = symbol__new(start, 0, kallsyms2elf_type(type), name); - + sym = symbol__new(start, end - start + 1, + kallsyms2elf_type(type), name); if (sym == NULL) return -ENOMEM; /* @@ -649,7 +679,6 @@ int dso__load_kallsyms(struct dso *self, const char *filename, if (dso__load_all_kallsyms(self, filename, map) < 0) return -1; - symbols__fixup_end(&self->symbols[map->type]); if (self->kernel == DSO_TYPE_GUEST_KERNEL) self->origin = DSO__ORIG_GUEST_KERNEL; else @@ -839,8 +868,11 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, char sympltname[1024]; Elf *elf; int nr = 0, symidx, fd, err = 0; + char name[PATH_MAX]; - fd = open(self->long_name, O_RDONLY); + snprintf(name, sizeof(name), "%s%s", + symbol_conf.symfs, self->long_name); + fd = open(name, O_RDONLY); if (fd < 0) goto out; @@ -1452,16 +1484,19 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) self->origin++) { switch (self->origin) { case DSO__ORIG_BUILD_ID_CACHE: - if (dso__build_id_filename(self, name, size) == NULL) + /* skip the locally configured cache if a symfs is given */ + if (symbol_conf.symfs[0] || + (dso__build_id_filename(self, name, size) == NULL)) { continue; + } break; case DSO__ORIG_FEDORA: - snprintf(name, size, "/usr/lib/debug%s.debug", - self->long_name); + snprintf(name, size, "%s/usr/lib/debug%s.debug", + symbol_conf.symfs, self->long_name); break; case DSO__ORIG_UBUNTU: - snprintf(name, size, "/usr/lib/debug%s", - self->long_name); + snprintf(name, size, "%s/usr/lib/debug%s", + symbol_conf.symfs, self->long_name); break; case DSO__ORIG_BUILDID: { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; @@ -1473,19 +1508,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) sizeof(self->build_id), build_id_hex); snprintf(name, size, - "/usr/lib/debug/.build-id/%.2s/%s.debug", - build_id_hex, build_id_hex + 2); + "%s/usr/lib/debug/.build-id/%.2s/%s.debug", + symbol_conf.symfs, build_id_hex, build_id_hex + 2); } break; case DSO__ORIG_DSO: - snprintf(name, size, "%s", self->long_name); + snprintf(name, size, "%s%s", + symbol_conf.symfs, self->long_name); break; case DSO__ORIG_GUEST_KMODULE: if (map->groups && map->groups->machine) root_dir = map->groups->machine->root_dir; else root_dir = ""; - snprintf(name, size, "%s%s", root_dir, self->long_name); + snprintf(name, size, "%s%s%s", symbol_conf.symfs, + root_dir, self->long_name); + break; + + case DSO__ORIG_KMODULE: + snprintf(name, size, "%s%s", symbol_conf.symfs, + self->long_name); break; default: @@ -1784,17 +1826,20 @@ int dso__load_vmlinux(struct dso *self, struct map *map, const char *vmlinux, symbol_filter_t filter) { int err = -1, fd; + char symfs_vmlinux[PATH_MAX]; - fd = open(vmlinux, O_RDONLY); + snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s", + symbol_conf.symfs, vmlinux); + fd = open(symfs_vmlinux, O_RDONLY); if (fd < 0) return -1; dso__set_loaded(self, map->type); - err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0); + err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0); close(fd); if (err > 0) - pr_debug("Using %s for symbols\n", vmlinux); + pr_debug("Using %s for symbols\n", symfs_vmlinux); return err; } @@ -1836,8 +1881,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, const char *kallsyms_filename = NULL; char *kallsyms_allocated_filename = NULL; /* - * Step 1: if the user specified a vmlinux filename, use it and only - * it, reporting errors to the user if it cannot be used. + * Step 1: if the user specified a kallsyms or vmlinux filename, use + * it and only it, reporting errors to the user if it cannot be used. * * For instance, try to analyse an ARM perf.data file _without_ a * build-id, or if the user specifies the wrong path to the right @@ -1850,6 +1895,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, * validation in dso__load_vmlinux and will bail out if they don't * match. */ + if (symbol_conf.kallsyms_name != NULL) { + kallsyms_filename = symbol_conf.kallsyms_name; + goto do_kallsyms; + } + if (symbol_conf.vmlinux_name != NULL) { err = dso__load_vmlinux(self, map, symbol_conf.vmlinux_name, filter); @@ -1867,6 +1917,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, goto out_fixup; } + /* do not try local files if a symfs was given */ + if (symbol_conf.symfs[0] != 0) + return -1; + /* * Say the kernel DSO was created when processing the build-id header table, * we have a build-id, so check if it is the same as the running kernel, @@ -2136,7 +2190,7 @@ struct process_args { }; static int symbol__in_kernel(void *arg, const char *name, - char type __used, u64 start) + char type __used, u64 start, u64 end __used) { struct process_args *args = arg; @@ -2257,9 +2311,6 @@ static int vmlinux_path__init(void) struct utsname uts; char bf[PATH_MAX]; - if (uname(&uts) < 0) - return -1; - vmlinux_path = malloc(sizeof(char *) * 5); if (vmlinux_path == NULL) return -1; @@ -2272,6 +2323,14 @@ static int vmlinux_path__init(void) if (vmlinux_path[vmlinux_path__nr_entries] == NULL) goto out_fail; ++vmlinux_path__nr_entries; + + /* only try running kernel version if no symfs was given */ + if (symbol_conf.symfs[0] != 0) + return 0; + + if (uname(&uts) < 0) + return -1; + snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); if (vmlinux_path[vmlinux_path__nr_entries] == NULL) @@ -2331,6 +2390,8 @@ static int setup_list(struct strlist **list, const char *list_str, int symbol__init(void) { + const char *symfs; + if (symbol_conf.initialized) return 0; @@ -2359,6 +2420,18 @@ int symbol__init(void) symbol_conf.sym_list_str, "symbol") < 0) goto out_free_comm_list; + /* + * A path to symbols of "/" is identical to "" + * reset here for simplicity. + */ + symfs = realpath(symbol_conf.symfs, NULL); + if (symfs == NULL) + symfs = symbol_conf.symfs; + if (strcmp(symfs, "/") == 0) + symbol_conf.symfs = ""; + if (symfs != symbol_conf.symfs) + free((void *)symfs); + symbol_conf.initialized = true; return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 6c6eafdb932..670cd1c88f5 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -72,6 +72,7 @@ struct symbol_conf { show_cpu_utilization, initialized; const char *vmlinux_name, + *kallsyms_name, *source_prefix, *field_sep; const char *default_guest_vmlinux_name, @@ -85,6 +86,7 @@ struct symbol_conf { struct strlist *dso_list, *comm_list, *sym_list; + const char *symfs; }; extern struct symbol_conf symbol_conf; @@ -215,7 +217,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits); int build_id__sprintf(const u8 *self, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start)); + char type, u64 start, u64 end)); void machine__destroy_kernel_maps(struct machine *self); int __machine__create_kernel_maps(struct machine *self, struct dso *kernel); diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 056c69521a3..7b5a8926624 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c @@ -104,10 +104,24 @@ out_destroy_form: return rc; } -static const char yes[] = "Yes", no[] = "No"; +static const char yes[] = "Yes", no[] = "No", + warning_str[] = "Warning!", ok[] = "Ok"; bool ui__dialog_yesno(const char *msg) { /* newtWinChoice should really be accepting const char pointers... */ return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; } + +void ui__warning(const char *format, ...) +{ + va_list args; + + va_start(args, format); + if (use_browser > 0) + newtWinMessagev((char *)warning_str, (char *)ok, + (char *)format, args); + else + vfprintf(stderr, format, args); + va_end(args); +} |