From 65de51f93ebf9305ec011da59c0b5fe29429d1b9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 31 Jul 2014 09:00:44 +0300 Subject: perf tools: Identify which comms are from exec For grouping together all the data from a single execution, which is needed for pairing calls and returns e.g. any outstanding calls when a process exec's will never return. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1406786474-9306-2-git-send-email-adrian.hunter@intel.com [ Remove testing if comm->exec is false before setting it to true ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 16bba9fff2c..ea3e09f6a9c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -404,11 +404,13 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event struct thread *thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); + bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC; if (dump_trace) perf_event__fprintf_comm(event, stdout); - if (thread == NULL || thread__set_comm(thread, event->comm.comm, sample->time)) { + if (thread == NULL || + __thread__set_comm(thread, event->comm.comm, sample->time, exec)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } -- cgit v1.2.3-70-g09d2 From cfe1c41405fe9a559f8b3c24c904b2bb42d4a6e8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 31 Jul 2014 09:00:45 +0300 Subject: perf machine: Add machine__thread_exec_comm() Add machine__thread_exec_comm() to return the comm that matches the last exec, if the comm_exec flag is present, or the last comm otherwise. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1406786474-9306-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 26 ++++++++++++++++++++++++++ tools/perf/util/machine.h | 4 ++++ tools/perf/util/session.c | 24 +++++++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ea3e09f6a9c..b093b93607f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -31,6 +31,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->symbol_filter = NULL; machine->id_hdr_size = 0; + machine->comm_exec = false; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -179,6 +180,19 @@ void machines__set_symbol_filter(struct machines *machines, } } +void machines__set_comm_exec(struct machines *machines, bool comm_exec) +{ + struct rb_node *nd; + + machines->host.comm_exec = comm_exec; + + for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + machine->comm_exec = comm_exec; + } +} + struct machine *machines__find(struct machines *machines, pid_t pid) { struct rb_node **p = &machines->guests.rb_node; @@ -398,6 +412,15 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, return __machine__findnew_thread(machine, pid, tid, false); } +struct comm *machine__thread_exec_comm(struct machine *machine, + struct thread *thread) +{ + if (machine->comm_exec) + return thread__exec_comm(thread); + else + return thread__comm(thread); +} + int machine__process_comm_event(struct machine *machine, union perf_event *event, struct perf_sample *sample) { @@ -406,6 +429,9 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event event->comm.tid); bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC; + if (exec) + machine->comm_exec = true; + if (dump_trace) perf_event__fprintf_comm(event, stdout); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index b972824e629..61216e02831 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -26,6 +26,7 @@ struct machine { struct rb_node rb_node; pid_t pid; u16 id_hdr_size; + bool comm_exec; char *root_dir; struct rb_root threads; struct list_head dead_threads; @@ -47,6 +48,8 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type) struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid); +struct comm *machine__thread_exec_comm(struct machine *machine, + struct thread *thread); int machine__process_comm_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); @@ -88,6 +91,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size); void machines__set_symbol_filter(struct machines *machines, symbol_filter_t symbol_filter); +void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1b383bd93af..6d2d50dea1d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -67,6 +67,25 @@ static void perf_session__destroy_kernel_maps(struct perf_session *session) machines__destroy_kernel_maps(&session->machines); } +static bool perf_session__has_comm_exec(struct perf_session *session) +{ + struct perf_evsel *evsel; + + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.comm_exec) + return true; + } + + return false; +} + +static void perf_session__set_comm_exec(struct perf_session *session) +{ + bool comm_exec = perf_session__has_comm_exec(session); + + machines__set_comm_exec(&session->machines, comm_exec); +} + struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { @@ -90,6 +109,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, goto out_close; perf_session__set_id_hdr_size(session); + perf_session__set_comm_exec(session); } } @@ -866,8 +886,10 @@ static s64 perf_session__process_user_event(struct perf_session *session, switch (event->header.type) { case PERF_RECORD_HEADER_ATTR: err = tool->attr(tool, event, &session->evlist); - if (err == 0) + if (err == 0) { perf_session__set_id_hdr_size(session); + perf_session__set_comm_exec(session); + } return err; case PERF_RECORD_HEADER_EVENT_TYPE: /* -- cgit v1.2.3-70-g09d2 From 4b99375b38fa137f501cfa60b70e3f0a9da39c93 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Aug 2014 22:08:38 +0300 Subject: perf machine: Rename machine__get_kernel_start_addr() method Rename machine__get_kernel_start_addr() to machine__get_running_kernel_start() so that a new function, with a similar name to the original name, can be added that gets the kernel start address from the kernel map. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1408129739-17368-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b093b93607f..37f8dc557ec 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -593,8 +593,8 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; * Returns the name of the start symbol in *symbol_name. Pass in NULL as * symbol_name if it's not that important. */ -static u64 machine__get_kernel_start_addr(struct machine *machine, - const char **symbol_name) +static u64 machine__get_running_kernel_start(struct machine *machine, + const char **symbol_name) { char filename[PATH_MAX]; int i; @@ -621,7 +621,7 @@ static u64 machine__get_kernel_start_addr(struct machine *machine, int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { enum map_type type; - u64 start = machine__get_kernel_start_addr(machine, NULL); + u64 start = machine__get_running_kernel_start(machine, NULL); for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; @@ -940,7 +940,7 @@ int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); const char *name; - u64 addr = machine__get_kernel_start_addr(machine, &name); + u64 addr = machine__get_running_kernel_start(machine, &name); if (!addr) return -1; -- cgit v1.2.3-70-g09d2 From fbe2af45f6bd27ee69fd775303c936c3af4a4807 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Aug 2014 22:08:39 +0300 Subject: perf tools: Add machine__kernel_ip() Add a function to determine if an address is in the kernel. This is based on the kernel function kernel_ip(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1408129739-17368-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 6 +++--- tools/perf/util/machine.c | 23 +++++++++++++++++++++++ tools/perf/util/machine.h | 17 +++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1398c83d896..ed558191c0b 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -784,9 +784,9 @@ try_again: * "[vdso]" dso, but for now lets use the old trick of looking * in the whole kernel symbol list. */ - if ((long long)al->addr < 0 && - cpumode == PERF_RECORD_MISC_USER && - machine && mg != &machine->kmaps) { + if (cpumode == PERF_RECORD_MISC_USER && machine && + mg != &machine->kmaps && + machine__kernel_ip(machine, al->addr)) { mg = &machine->kmaps; load_map = true; goto try_again; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 37f8dc557ec..e00daf0d2bd 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -32,6 +32,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->symbol_filter = NULL; machine->id_hdr_size = 0; machine->comm_exec = false; + machine->kernel_start = 0; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -1559,3 +1560,25 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return 0; } + +int machine__get_kernel_start(struct machine *machine) +{ + struct map *map = machine__kernel_map(machine, MAP__FUNCTION); + int err = 0; + + /* + * The only addresses above 2^63 are kernel addresses of a 64-bit + * kernel. Note that addresses are unsigned so that on a 32-bit system + * all addresses including kernel addresses are less than 2^32. In + * that case (32-bit system), if the kernel mapping is unknown, all + * addresses will be assumed to be in user space - see + * machine__kernel_ip(). + */ + machine->kernel_start = 1ULL << 63; + if (map) { + err = map__load(map, machine->symbol_filter); + if (map->start) + machine->kernel_start = map->start; + } + return err; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 61216e02831..6a6bcc1cff5 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -36,6 +36,7 @@ struct machine { struct list_head kernel_dsos; struct map_groups kmaps; struct map *vmlinux_maps[MAP__NR_TYPES]; + u64 kernel_start; symbol_filter_t symbol_filter; pid_t *current_tid; }; @@ -46,6 +47,22 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type) return machine->vmlinux_maps[type]; } +int machine__get_kernel_start(struct machine *machine); + +static inline u64 machine__kernel_start(struct machine *machine) +{ + if (!machine->kernel_start) + machine__get_kernel_start(machine); + return machine->kernel_start; +} + +static inline bool machine__kernel_ip(struct machine *machine, u64 ip) +{ + u64 kernel_start = machine__kernel_start(machine); + + return ip >= kernel_start; +} + struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid); struct comm *machine__thread_exec_comm(struct machine *machine, -- cgit v1.2.3-70-g09d2 From 06b2afc0b9a26e7673856a24ab57bfb307dad394 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 20 Aug 2014 23:25:11 -0400 Subject: perf machine: Fallback to MAP__FUNCTION if daddr maps are NULL As we run "perf c2c" on more applications, we noticed we're missing significant samples from a common customer's application. Looking at the /proc//maps file for the app, we see "rwxs" and "rwxp" permissions on many of the shared memory & heap regions, and on all the thread stacks. Because those regions have the "x" bit set, perf marks them with a MAP_FUNCTION type. Hence ip_resolve_data() never finds load or store events coming from them. We fixed this by re-calling thread__find_addr_location with MAP__FUNCTION in the case where map is NULL as a last ditch effort to map the sample before giving up and dropping it. Reported-by: Joe Mario Tested-by: Joe Mario Signed-off-by: Don Zickus Acked-by: Jiri Olsa Cc: Jiri Olsa Cc: Joe Mario Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1408591511-57884-1-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index e00daf0d2bd..b2ec38bf211 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1314,6 +1314,16 @@ static void ip__resolve_data(struct machine *machine, struct thread *thread, thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr, &al); + if (al.map == NULL) { + /* + * some shared data regions have execute bit set which puts + * their mapping in the MAP__FUNCTION type array. + * Check there as a fallback option before dropping the sample. + */ + thread__find_addr_location(thread, machine, m, MAP__FUNCTION, addr, + &al); + } + ams->addr = addr; ams->al_addr = al.addr; ams->sym = al.sym; -- cgit v1.2.3-70-g09d2 From 8fa7d87f91479f7124142ca4ad93a37b80f8c1c0 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 29 Sep 2014 16:07:28 -0400 Subject: perf symbols: Encapsulate dsos list head into struct dsos This is a precursor patch to enable long name searching of DSOs using a rbtree. In this patch, a new dsos structure is created which contains only a list head structure for the moment. The new dsos structure is used, in turn, in the machine structure for the user_dsos and kernel_dsos fields. Only the following 3 dsos functions are modified to accept the new dsos structure parameter instead of list_head: - dsos__add() - dsos__find() - __dsos__findnew() Signed-off-by: Waiman Long Cc: Adrian Hunter Cc: Don Zickus Cc: Douglas Hatch Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Link: http://lkml.kernel.org/r/1412021249-19201-2-git-send-email-Waiman.Long@hp.com [ Move struct dsos to dso.h to reduce the dso methods depends on machine.h ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 17 +++++++++-------- tools/perf/util/dso.h | 13 ++++++++++--- tools/perf/util/header.c | 32 ++++++++++++++++++-------------- tools/perf/util/machine.c | 24 ++++++++++++------------ tools/perf/util/machine.h | 5 +++-- tools/perf/util/probe-event.c | 3 ++- tools/perf/util/symbol-elf.c | 7 ++++++- 7 files changed, 60 insertions(+), 41 deletions(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 55e39dc1bcd..901a58fa3f2 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -851,35 +851,36 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } -void dsos__add(struct list_head *head, struct dso *dso) +void dsos__add(struct dsos *dsos, struct dso *dso) { - list_add_tail(&dso->node, head); + list_add_tail(&dso->node, &dsos->head); } -struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short) +struct dso *dsos__find(const struct dsos *dsos, const char *name, + bool cmp_short) { struct dso *pos; if (cmp_short) { - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, &dsos->head, node) if (strcmp(pos->short_name, name) == 0) return pos; return NULL; } - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, &dsos->head, node) if (strcmp(pos->long_name, name) == 0) return pos; return NULL; } -struct dso *__dsos__findnew(struct list_head *head, const char *name) +struct dso *__dsos__findnew(struct dsos *dsos, const char *name) { - struct dso *dso = dsos__find(head, name, false); + struct dso *dso = dsos__find(dsos, name, false); if (!dso) { dso = dso__new(name); if (dso != NULL) { - dsos__add(head, dso); + dsos__add(dsos, dso); dso__set_basename(dso); } } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 5e463c0964d..b63dc98ad71 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -90,6 +90,13 @@ struct dso_cache { char data[0]; }; +/* + * DSOs are put into a list for fast iteration. + */ +struct dsos { + struct list_head head; +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; @@ -224,10 +231,10 @@ struct map *dso__new_map(const char *name); struct dso *dso__kernel_findnew(struct machine *machine, const char *name, const char *short_name, int dso_type); -void dsos__add(struct list_head *head, struct dso *dso); -struct dso *dsos__find(const struct list_head *head, const char *name, +void dsos__add(struct dsos *dsos, struct dso *dso); +struct dso *dsos__find(const struct dsos *dsos, const char *name, bool cmp_short); -struct dso *__dsos__findnew(struct list_head *head, const char *name); +struct dso *__dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 158c787ce0c..ce0de00399d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -214,11 +214,11 @@ static int machine__hit_all_dsos(struct machine *machine) { int err; - err = __dsos__hit_all(&machine->kernel_dsos); + err = __dsos__hit_all(&machine->kernel_dsos.head); if (err) return err; - return __dsos__hit_all(&machine->user_dsos); + return __dsos__hit_all(&machine->user_dsos.head); } int dsos__hit_all(struct perf_session *session) @@ -288,11 +288,12 @@ static int machine__write_buildid_table(struct machine *machine, int fd) umisc = PERF_RECORD_MISC_GUEST_USER; } - err = __dsos__write_buildid_table(&machine->kernel_dsos, machine, + err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine, machine->pid, kmisc, fd); if (err == 0) - err = __dsos__write_buildid_table(&machine->user_dsos, machine, - machine->pid, umisc, fd); + err = __dsos__write_buildid_table(&machine->user_dsos.head, + machine, machine->pid, umisc, + fd); return err; } @@ -455,9 +456,10 @@ static int __dsos__cache_build_ids(struct list_head *head, static int machine__cache_build_ids(struct machine *machine, const char *debugdir) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine, + int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir); + ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, + debugdir); return ret; } @@ -483,8 +485,10 @@ static int perf_session__cache_build_ids(struct perf_session *session) static bool machine__read_build_ids(struct machine *machine, bool with_hits) { - bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits); - ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits); + bool ret; + + ret = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits); + ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits); return ret; } @@ -1548,7 +1552,7 @@ static int __event_process_build_id(struct build_id_event *bev, struct perf_session *session) { int err = -1; - struct list_head *head; + struct dsos *dsos; struct machine *machine; u16 misc; struct dso *dso; @@ -1563,22 +1567,22 @@ static int __event_process_build_id(struct build_id_event *bev, switch (misc) { case PERF_RECORD_MISC_KERNEL: dso_type = DSO_TYPE_KERNEL; - head = &machine->kernel_dsos; + dsos = &machine->kernel_dsos; break; case PERF_RECORD_MISC_GUEST_KERNEL: dso_type = DSO_TYPE_GUEST_KERNEL; - head = &machine->kernel_dsos; + dsos = &machine->kernel_dsos; break; case PERF_RECORD_MISC_USER: case PERF_RECORD_MISC_GUEST_USER: dso_type = DSO_TYPE_USER; - head = &machine->user_dsos; + dsos = &machine->user_dsos; break; default: goto out; } - dso = __dsos__findnew(head, filename); + dso = __dsos__findnew(dsos, filename); if (dso != NULL) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b2ec38bf211..49a75ec4c47 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -17,8 +17,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { map_groups__init(&machine->kmaps); RB_CLEAR_NODE(&machine->rb_node); - INIT_LIST_HEAD(&machine->user_dsos); - INIT_LIST_HEAD(&machine->kernel_dsos); + INIT_LIST_HEAD(&machine->user_dsos.head); + INIT_LIST_HEAD(&machine->kernel_dsos.head); machine->threads = RB_ROOT; INIT_LIST_HEAD(&machine->dead_threads); @@ -72,11 +72,11 @@ out_delete: return NULL; } -static void dsos__delete(struct list_head *dsos) +static void dsos__delete(struct dsos *dsos) { struct dso *pos, *n; - list_for_each_entry_safe(pos, n, dsos, node) { + list_for_each_entry_safe(pos, n, &dsos->head, node) { list_del(&pos->node); dso__delete(pos); } @@ -477,23 +477,23 @@ struct map *machine__new_module(struct machine *machine, u64 start, size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) { struct rb_node *nd; - size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) + - __dsos__fprintf(&machines->host.user_dsos, fp); + size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) + + __dsos__fprintf(&machines->host.user_dsos.head, fp); for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret += __dsos__fprintf(&pos->kernel_dsos, fp); - ret += __dsos__fprintf(&pos->user_dsos, fp); + ret += __dsos__fprintf(&pos->kernel_dsos.head, fp); + ret += __dsos__fprintf(&pos->user_dsos.head, fp); } return ret; } -size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, +size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm) { - return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) + - __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm); + return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) + + __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm); } size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, @@ -994,7 +994,7 @@ static bool machine__uses_kcore(struct machine *machine) { struct dso *dso; - list_for_each_entry(dso, &machine->kernel_dsos, node) { + list_for_each_entry(dso, &machine->kernel_dsos.head, node) { if (dso__is_kcore(dso)) return true; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 6a6bcc1cff5..2b651a7f5d0 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -4,6 +4,7 @@ #include #include #include "map.h" +#include "dso.h" #include "event.h" struct addr_location; @@ -32,8 +33,8 @@ struct machine { struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; - struct list_head user_dsos; - struct list_head kernel_dsos; + struct dsos user_dsos; + struct dsos kernel_dsos; struct map_groups kmaps; struct map *vmlinux_maps[MAP__NR_TYPES]; u64 kernel_start; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index be37b5aca33..c150ca4343e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -184,7 +184,8 @@ static struct dso *kernel_get_module_dso(const char *module) const char *vmlinux_name; if (module) { - list_for_each_entry(dso, &host_machine->kernel_dsos, node) { + list_for_each_entry(dso, &host_machine->kernel_dsos.head, + node) { if (strncmp(dso->short_name + 1, module, dso->short_name_len - 2) == 0) goto found; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 2a92e10317c..1e23a5bfb04 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,7 @@ #include #include "symbol.h" +#include "machine.h" #include "vdso.h" #include #include "debug.h" @@ -929,7 +930,11 @@ int dso__load_sym(struct dso *dso, struct map *map, } curr_dso->symtab_type = dso->symtab_type; map_groups__insert(kmap->kmaps, curr_map); - dsos__add(&dso->node, curr_dso); + /* + * The new DSO should go to the kernel DSOS + */ + dsos__add(&map->groups->machine->kernel_dsos, + curr_dso); dso__set_loaded(curr_dso, map->type); } else curr_dso = curr_map->dso; -- cgit v1.2.3-70-g09d2 From 4598a0a6d22fadfb7b37f2b44ee7fdcb24632fcf Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 30 Sep 2014 13:36:15 -0400 Subject: perf symbols: Improve DSO long names lookup speed with rbtree With workload that spawns and destroys many threads and processes, it was found that perf-mem could took a long time to post-process the perf data after the target workload had completed its operation. The performance bottleneck was found to be the lookup and insertion of the new DSO structures (thousands of them in this case). In a dual-socket Ivy-Bridge E7-4890 v2 machine (30-core, 60-thread), the perf profile below shows what perf was doing after the profiled AIM7 shared workload completed: - 83.94% perf libc-2.11.3.so [.] __strcmp_sse42 - __strcmp_sse42 - 99.82% map__new machine__process_mmap_event perf_session_deliver_event perf_session__process_event __perf_session__process_events cmd_record cmd_mem run_builtin main __libc_start_main - 13.17% perf perf [.] __dsos__findnew __dsos__findnew map__new machine__process_mmap_event perf_session_deliver_event perf_session__process_event __perf_session__process_events cmd_record cmd_mem run_builtin main __libc_start_main So about 97% of CPU times were spent in the map__new() function trying to insert new DSO entry into the DSO linked list. The whole post-processing step took about 9 minutes. The DSO structures are currently searched linearly. So the total processing time will be proportional to n^2. To overcome this performance problem, the DSO code is modified to also put the DSO structures in a RB tree sorted by its long name in additional to being in a simple linked list. With this change, the processing time will become proportional to n*log(n) which will be much quicker for large n. However, the short name will still be searched using the old linear searching method. With that patch in place, the same perf-mem post-processing step took less than 30 seconds to complete. Signed-off-by: Waiman Long Cc: Adrian Hunter Cc: Don Zickus Cc: Douglas Hatch Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Link: http://lkml.kernel.org/r/1412098575-27863-3-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 70 ++++++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/dso.h | 5 +++- tools/perf/util/machine.c | 1 + 3 files changed, 71 insertions(+), 5 deletions(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 901a58fa3f2..0247acfdfac 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -653,6 +653,65 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, return dso; } +/* + * Find a matching entry and/or link current entry to RB tree. + * Either one of the dso or name parameter must be non-NULL or the + * function will not work. + */ +static struct dso *dso__findlink_by_longname(struct rb_root *root, + struct dso *dso, const char *name) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + + if (!name) + name = dso->long_name; + /* + * Find node with the matching name + */ + while (*p) { + struct dso *this = rb_entry(*p, struct dso, rb_node); + int rc = strcmp(name, this->long_name); + + parent = *p; + if (rc == 0) { + /* + * In case the new DSO is a duplicate of an existing + * one, print an one-time warning & put the new entry + * at the end of the list of duplicates. + */ + if (!dso || (dso == this)) + return this; /* Find matching dso */ + /* + * The core kernel DSOs may have duplicated long name. + * In this case, the short name should be different. + * Comparing the short names to differentiate the DSOs. + */ + rc = strcmp(dso->short_name, this->short_name); + if (rc == 0) { + pr_err("Duplicated dso name: %s\n", name); + return NULL; + } + } + if (rc < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + if (dso) { + /* Add new node and rebalance tree */ + rb_link_node(&dso->rb_node, parent, p); + rb_insert_color(&dso->rb_node, root); + } + return NULL; +} + +static inline struct dso * +dso__find_by_longname(const struct rb_root *root, const char *name) +{ + return dso__findlink_by_longname((struct rb_root *)root, NULL, name); +} + void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) { if (name == NULL) @@ -755,6 +814,7 @@ struct dso *dso__new(const char *name) dso->a2l_fails = 1; dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; + RB_CLEAR_NODE(&dso->rb_node); INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); } @@ -765,6 +825,10 @@ struct dso *dso__new(const char *name) void dso__delete(struct dso *dso) { int i; + + if (!RB_EMPTY_NODE(&dso->rb_node)) + pr_err("DSO %s is still in rbtree when being deleted!\n", + dso->long_name); for (i = 0; i < MAP__NR_TYPES; ++i) symbols__delete(&dso->symbols[i]); @@ -854,6 +918,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) void dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); + dso__findlink_by_longname(&dsos->root, dso, NULL); } struct dso *dsos__find(const struct dsos *dsos, const char *name, @@ -867,10 +932,7 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name, return pos; return NULL; } - list_for_each_entry(pos, &dsos->head, node) - if (strcmp(pos->long_name, name) == 0) - return pos; - return NULL; + return dso__find_by_longname(&dsos->root, name); } struct dso *__dsos__findnew(struct dsos *dsos, const char *name) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index b63dc98ad71..acb651acc7f 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -91,14 +91,17 @@ struct dso_cache { }; /* - * DSOs are put into a list for fast iteration. + * DSOs are put into both a list for fast iteration and rbtree for fast + * long name lookup. */ struct dsos { struct list_head head; + struct rb_root root; /* rbtree root sorted by long name */ }; struct dso { struct list_head node; + struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; void *a2l; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 49a75ec4c47..b7d477fbda0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -77,6 +77,7 @@ static void dsos__delete(struct dsos *dsos) struct dso *pos, *n; list_for_each_entry_safe(pos, n, &dsos->head, node) { + RB_CLEAR_NODE(&pos->rb_node); list_del(&pos->node); dso__delete(pos); } -- cgit v1.2.3-70-g09d2 From e167f995e26249aa93708589c5eea539652351fa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Oct 2014 15:07:48 -0300 Subject: perf machine: Add missing dsos->root rbtree root initialization A segfault happens on 'perf test hists_link' because we end up using a struct machines on the stack, and then machines__init() was not initializing the newly introduced rb_root, just the existing list_head. When we introduced struct dsos, to group the two ways to store dsos, i.e. the linked list and the rbtree, we didn't turned the initialization done in: machines__init(machines->host) -> machine__init() -> INIT_LIST_HEAD into a dsos__init() to keep on initializing the list_head but _as well_ initializing the rb_root, oops. All worked because outside perf-test we probably zalloc the whole thing which ends up initializing it in to NULL. So the problem looks contained to 'perf test' that uses it on stack, etc. Reported-by: Jiri Olsa Acked-by: Waiman Long , Cc: Adrian Hunter , Cc: Don Zickus Cc: Douglas Hatch Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Waiman Long , Link: http://lkml.kernel.org/r/20141014180353.GF3198@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/machine.c') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b7d477fbda0..34fc7c8672e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -13,12 +13,18 @@ #include #include "unwind.h" +static void dsos__init(struct dsos *dsos) +{ + INIT_LIST_HEAD(&dsos->head); + dsos->root = RB_ROOT; +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { map_groups__init(&machine->kmaps); RB_CLEAR_NODE(&machine->rb_node); - INIT_LIST_HEAD(&machine->user_dsos.head); - INIT_LIST_HEAD(&machine->kernel_dsos.head); + dsos__init(&machine->user_dsos); + dsos__init(&machine->kernel_dsos); machine->threads = RB_ROOT; INIT_LIST_HEAD(&machine->dead_threads); -- cgit v1.2.3-70-g09d2