diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-13 14:39:42 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-13 14:39:42 -0800 |
commit | 9fdfbc2bff587f454dd95e2caa6d147c9abe39e4 (patch) | |
tree | 2feaee47cbcfb57dd0d5cf23509e22011541e717 /tools/perf | |
parent | 8cea4eb642890a1de58980e7e1617d1765ef8f7c (diff) | |
parent | dc1d628a67a8f042e711ea5accc0beedc3ef0092 (diff) |
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
perf: Provide generic perf_sample_data initialization
MAINTAINERS: Add Arnaldo as tools/perf/ co-maintainer
perf trace: Don't use pager if scripting
perf trace/scripting: Remove extraneous header read
perf, ARM: Modify kuser rmb() call to compile for Thumb-2
x86/stacktrace: Don't dereference bad frame pointers
perf archive: Don't try to collect files without a build-id
perf_events, x86: Fixup fixed counter constraints
perf, x86: Restrict the ANY flag
perf, x86: rename macro in ARCH_PERFMON_EVENTSEL_ENABLE
perf, x86: add some IBS macros to perf_event.h
perf, x86: make IBS macros available in perf_event.h
hw-breakpoints: Remove stub unthrottle callback
x86/hw-breakpoints: Remove the name field
perf: Remove pointless breakpoint union
perf lock: Drop the buffers multiplexing dependency
perf lock: Fix and add misc documentally things
percpu: Add __percpu sparse annotations to hw_breakpoint
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/Documentation/perf-lock.txt | 29 | ||||
-rw-r--r-- | tools/perf/builtin-lock.c | 148 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 4 | ||||
-rw-r--r-- | tools/perf/command-list.txt | 1 | ||||
-rw-r--r-- | tools/perf/perf-archive.sh | 3 | ||||
-rw-r--r-- | tools/perf/perf.h | 4 | ||||
-rw-r--r-- | tools/perf/util/probe-event.c | 2 |
7 files changed, 182 insertions, 9 deletions
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt new file mode 100644 index 00000000000..b317102138c --- /dev/null +++ b/tools/perf/Documentation/perf-lock.txt @@ -0,0 +1,29 @@ +perf-lock(1) +============ + +NAME +---- +perf-lock - Analyze lock events + +SYNOPSIS +-------- +[verse] +'perf lock' {record|report|trace} + +DESCRIPTION +----------- +You can analyze various lock behaviours +and statistics with this 'perf lock' command. + + 'perf lock record <command>' records lock events + between start and end <command>. And this command + produces the file "perf.data" which contains tracing + results of lock events. + + 'perf lock trace' shows raw lock events. + + 'perf lock report' reports statistical data. + +SEE ALSO +-------- +linkperf:perf[1] diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index fb9ab2ad3f9..e12c844df1e 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -460,6 +460,150 @@ process_raw_event(void *data, int cpu, process_lock_release_event(data, event, cpu, timestamp, thread); } +struct raw_event_queue { + u64 timestamp; + int cpu; + void *data; + struct thread *thread; + struct list_head list; +}; + +static LIST_HEAD(raw_event_head); + +#define FLUSH_PERIOD (5 * NSEC_PER_SEC) + +static u64 flush_limit = ULLONG_MAX; +static u64 last_flush = 0; +struct raw_event_queue *last_inserted; + +static void flush_raw_event_queue(u64 limit) +{ + struct raw_event_queue *tmp, *iter; + + list_for_each_entry_safe(iter, tmp, &raw_event_head, list) { + if (iter->timestamp > limit) + return; + + if (iter == last_inserted) + last_inserted = NULL; + + process_raw_event(iter->data, iter->cpu, iter->timestamp, + iter->thread); + + last_flush = iter->timestamp; + list_del(&iter->list); + free(iter->data); + free(iter); + } +} + +static void __queue_raw_event_end(struct raw_event_queue *new) +{ + struct raw_event_queue *iter; + + list_for_each_entry_reverse(iter, &raw_event_head, list) { + if (iter->timestamp < new->timestamp) { + list_add(&new->list, &iter->list); + return; + } + } + + list_add(&new->list, &raw_event_head); +} + +static void __queue_raw_event_before(struct raw_event_queue *new, + struct raw_event_queue *iter) +{ + list_for_each_entry_continue_reverse(iter, &raw_event_head, list) { + if (iter->timestamp < new->timestamp) { + list_add(&new->list, &iter->list); + return; + } + } + + list_add(&new->list, &raw_event_head); +} + +static void __queue_raw_event_after(struct raw_event_queue *new, + struct raw_event_queue *iter) +{ + list_for_each_entry_continue(iter, &raw_event_head, list) { + if (iter->timestamp > new->timestamp) { + list_add_tail(&new->list, &iter->list); + return; + } + } + list_add_tail(&new->list, &raw_event_head); +} + +/* The queue is ordered by time */ +static void __queue_raw_event(struct raw_event_queue *new) +{ + if (!last_inserted) { + __queue_raw_event_end(new); + return; + } + + /* + * Most of the time the current event has a timestamp + * very close to the last event inserted, unless we just switched + * to another event buffer. Having a sorting based on a list and + * on the last inserted event that is close to the current one is + * probably more efficient than an rbtree based sorting. + */ + if (last_inserted->timestamp >= new->timestamp) + __queue_raw_event_before(new, last_inserted); + else + __queue_raw_event_after(new, last_inserted); +} + +static void queue_raw_event(void *data, int raw_size, int cpu, + u64 timestamp, struct thread *thread) +{ + struct raw_event_queue *new; + + if (flush_limit == ULLONG_MAX) + flush_limit = timestamp + FLUSH_PERIOD; + + if (timestamp < last_flush) { + printf("Warning: Timestamp below last timeslice flush\n"); + return; + } + + new = malloc(sizeof(*new)); + if (!new) + die("Not enough memory\n"); + + new->timestamp = timestamp; + new->cpu = cpu; + new->thread = thread; + + new->data = malloc(raw_size); + if (!new->data) + die("Not enough memory\n"); + + memcpy(new->data, data, raw_size); + + __queue_raw_event(new); + last_inserted = new; + + /* + * We want to have a slice of events covering 2 * FLUSH_PERIOD + * If FLUSH_PERIOD is big enough, it ensures every events that occured + * in the first half of the timeslice have all been buffered and there + * are none remaining (we need that because of the weakly ordered + * event recording we have). Then once we reach the 2 * FLUSH_PERIOD + * timeslice, we flush the first half to be gentle with the memory + * (the second half can still get new events in the middle, so wait + * another period to flush it) + */ + if (new->timestamp > flush_limit && + new->timestamp - flush_limit > FLUSH_PERIOD) { + flush_limit += FLUSH_PERIOD; + flush_raw_event_queue(flush_limit); + } +} + static int process_sample_event(event_t *event, struct perf_session *session) { struct thread *thread; @@ -480,7 +624,7 @@ static int process_sample_event(event_t *event, struct perf_session *session) if (profile_cpu != -1 && profile_cpu != (int) data.cpu) return 0; - process_raw_event(data.raw_data, data.cpu, data.time, thread); + queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread); return 0; } @@ -576,6 +720,7 @@ static void __cmd_report(void) setup_pager(); select_key(); read_events(); + flush_raw_event_queue(ULLONG_MAX); sort_result(); print_result(); } @@ -608,7 +753,6 @@ static const char *record_args[] = { "record", "-a", "-R", - "-M", "-f", "-m", "1024", "-c", "1", diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5db687fc13d..407041d20de 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -573,7 +573,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) if (symbol__init() < 0) return -1; - setup_pager(); + if (!script_name) + setup_pager(); session = perf_session__new(input_name, O_RDONLY, 0); if (session == NULL) @@ -608,7 +609,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) return -1; } - perf_header__read(&session->header, input); err = scripting_ops->generate_script("perf-trace"); goto out; } diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 9afcff2e3ae..db6ee94d4a8 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -18,3 +18,4 @@ perf-top mainporcelain common perf-trace mainporcelain common perf-probe mainporcelain common perf-kmem mainporcelain common +perf-lock mainporcelain common diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 45fbe2f07b1..910468e6e01 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -9,8 +9,9 @@ fi DEBUGDIR=~/.debug/ BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) +NOBUILDID=0000000000000000000000000000000000000000 -perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS +perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS if [ ! -s $BUILDIDS ] ; then echo "perf archive: no build-ids found" rm -f $BUILDIDS diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 75f941bfba9..6fb379bc1d1 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -65,9 +65,7 @@ * Use the __kuser_memory_barrier helper in the CPU helper page. See * arch/arm/kernel/entry-armv.S in the kernel source for details. */ -#define rmb() asm volatile("mov r0, #0xffff0fff; mov lr, pc;" \ - "sub pc, r0, #95" ::: "r0", "lr", "cc", \ - "memory") +#define rmb() ((void(*)(void))0xffff0fa0)() #define cpu_relax() asm volatile("":::"memory") #endif diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c971e81e9cb..53181dbfe4a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -508,8 +508,8 @@ void show_perf_probe_events(void) struct str_node *ent; setup_pager(); - memset(&pp, 0, sizeof(pp)); + fd = open_kprobe_events(O_RDONLY, 0); rawlist = get_trace_kprobe_event_rawlist(fd); close(fd); |