diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 6 | ||||
-rw-r--r-- | tools/perf/builtin-probe.c | 2 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 2 | ||||
-rw-r--r-- | tools/perf/builtin-top.c | 4 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 10 | ||||
-rw-r--r-- | tools/perf/config/Makefile | 14 | ||||
-rw-r--r-- | tools/perf/config/feature-checks/Makefile | 3 | ||||
-rw-r--r-- | tools/perf/config/feature-checks/test-all.c | 4 | ||||
-rw-r--r-- | tools/perf/tests/parse-events.c | 3 | ||||
-rw-r--r-- | tools/perf/ui/browser.c | 4 | ||||
-rw-r--r-- | tools/perf/ui/browsers/hists.c | 11 | ||||
-rw-r--r-- | tools/perf/util/event.c | 6 | ||||
-rw-r--r-- | tools/perf/util/evlist.c | 6 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 4 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 5 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 2 | ||||
-rw-r--r-- | tools/perf/util/target.h | 1 | ||||
-rw-r--r-- | tools/perf/util/unwind.c | 9 | ||||
-rw-r--r-- | tools/perf/util/unwind.h | 5 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 197 | ||||
-rw-r--r-- | tools/virtio/virtio_test.c | 6 | ||||
-rw-r--r-- | tools/virtio/vringh_test.c | 13 |
22 files changed, 217 insertions, 100 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 052f7c4dc00..43b42c4f4a9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -201,6 +201,12 @@ abort events and some memory events in precise mode on modern Intel CPUs. --transaction:: Record transaction flags for transaction related events. +--force-per-cpu:: +Force the use of per-cpu mmaps. By default, when tasks are specified (i.e. -p, +-t or -u options) per-thread mmaps are created. This option overrides that and +forces per-cpu mmaps. A side-effect of that is that inheritance is +automatically enabled. Add the -i option also to disable inheritance. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 89acc17cf2a..6ea9e85bdc0 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -325,6 +325,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_set_filter), OPT_CALLBACK('x', "exec", NULL, "executable|path", "target executable name or path", opt_set_target), + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, + "Disable symbol demangling"), OPT_END() }; int ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4d644fe2d5b..7c8020a3278 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -888,6 +888,8 @@ const struct option record_options[] = { "sample by weight (on special events only)"), OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, "sample transaction flags (special events only)"), + OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu, + "force the use of per-cpu mmaps"), OPT_END() }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b8f8e29db33..71e6402729a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1172,7 +1172,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) status = target__validate(target); if (status) { target__strerror(target, status, errbuf, BUFSIZ); - ui__warning("%s", errbuf); + ui__warning("%s\n", errbuf); } status = target__parse_uid(target); @@ -1180,7 +1180,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) int saved_errno = errno; target__strerror(target, status, errbuf, BUFSIZ); - ui__error("%s", errbuf); + ui__error("%s\n", errbuf); status = -saved_errno; goto out_delete_evlist; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6b230af940e..8be17fc462b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2112,9 +2112,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, "\n"); - printed += fprintf(fp, " msec/call\n"); - printed += fprintf(fp, " syscall calls min avg max stddev\n"); - printed += fprintf(fp, " --------------- -------- -------- -------- -------- ------\n"); + printed += fprintf(fp, " syscall calls min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); /* each int_node is a syscall */ while (inode) { @@ -2131,9 +2131,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, sc = &trace->syscalls.table[inode->i]; printed += fprintf(fp, " %-15s", sc->name); - printed += fprintf(fp, " %8" PRIu64 " %8.3f %8.3f", + printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", n, min, avg); - printed += fprintf(fp, " %8.3f %6.2f\n", max, pct); + printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } inode = intlist__next(inode); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f5905f2b197..f7d11a811c7 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -142,7 +142,8 @@ CORE_FEATURE_TESTS = \ libunwind \ on-exit \ stackprotector \ - stackprotector-all + stackprotector-all \ + timerfd # # So here we detect whether test-all was rebuilt, to be able @@ -328,8 +329,14 @@ ifndef NO_LIBUNWIND msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1); NO_LIBUNWIND := 1 else - ifneq ($(feature-libunwind-debug-frame), 1) - msg := $(warning No debug_frame support found in libunwind); + ifeq ($(ARCH),arm) + $(call feature_check,libunwind-debug-frame) + ifneq ($(feature-libunwind-debug-frame), 1) + msg := $(warning No debug_frame support found in libunwind); + CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME + endif + else + # non-ARM has no dwarf_find_debug_frame() function: CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif endif @@ -405,7 +412,6 @@ else endif endif -$(call feature_check,timerfd) ifeq ($(feature-timerfd), 1) CFLAGS += -DHAVE_TIMERFD_SUPPORT else diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index e8e195f49a4..87e790017c6 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -76,6 +76,9 @@ test-libnuma: test-libunwind: $(BUILD) $(LIBUNWIND_LIBS) -lelf +test-libunwind-debug-frame: + $(BUILD) $(LIBUNWIND_LIBS) -lelf + test-libaudit: $(BUILD) -laudit diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 799865b6077..59e7a705e14 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -49,10 +49,6 @@ # include "test-libunwind.c" #undef main -#define main main_test_libunwind_debug_frame -# include "test-libunwind-debug-frame.c" -#undef main - #define main main_test_libaudit # include "test-libaudit.c" #undef main diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index ef671cd41bb..3cbd1049608 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -441,9 +441,8 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) static int test__checkevent_pmu_events(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong exclude_user", diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index bbc782e364b..cbaa7af4551 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -569,7 +569,7 @@ void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence) browser->top = browser->top + browser->top_idx + offset; break; case SEEK_END: - browser->top = browser->top + browser->nr_entries + offset; + browser->top = browser->top + browser->nr_entries - 1 + offset; break; default: return; @@ -680,7 +680,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser, if (end >= browser->top_idx + browser->height) end_row = browser->height - 1; else - end_row = end - browser->top_idx;; + end_row = end - browser->top_idx; ui_browser__gotorc(browser, row, column); SLsmg_draw_vline(end_row - row + 1); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 16848bb4c41..a440e03cd8c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1847,15 +1847,15 @@ browse_hists: switch (key) { case K_TAB: if (pos->node.next == &evlist->entries) - pos = list_entry(evlist->entries.next, struct perf_evsel, node); + pos = perf_evlist__first(evlist); else - pos = list_entry(pos->node.next, struct perf_evsel, node); + pos = perf_evsel__next(pos); goto browse_hists; case K_UNTAB: if (pos->node.prev == &evlist->entries) - pos = list_entry(evlist->entries.prev, struct perf_evsel, node); + pos = perf_evlist__last(evlist); else - pos = list_entry(pos->node.prev, struct perf_evsel, node); + pos = perf_evsel__prev(pos); goto browse_hists; case K_ESC: if (!ui_browser__dialog_yesno(&menu->b, @@ -1943,8 +1943,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, single_entry: if (nr_entries == 1) { - struct perf_evsel *first = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); const char *ev_name = perf_evsel__name(first); return perf_evsel__hists_browse(first, nr_entries, help, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6e3a846aed0..bb788c109fe 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -209,8 +209,10 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, &event->mmap.start, &event->mmap.len, prot, &event->mmap.pgoff, execname); - - if (n != 5) + /* + * Anon maps don't have the execname. + */ + if (n < 4) continue; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dc6fa3fbb18..bbc746aa571 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -819,7 +819,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->threads == NULL) return -1; - if (target__has_task(target)) + if (target->force_per_cpu) + evlist->cpus = cpu_map__new(target->cpu_list); + else if (target__has_task(target)) evlist->cpus = cpu_map__dummy_new(); else if (!target__has_cpu(target) && !target->uses_mmap) evlist->cpus = cpu_map__dummy_new(); @@ -1148,7 +1150,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) perf_evsel__name(evsel)); } - return printed + fprintf(fp, "\n");; + return printed + fprintf(fp, "\n"); } int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 18f7c188ff6..46dd4c2a41c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -645,7 +645,7 @@ void perf_evsel__config(struct perf_evsel *evsel, } } - if (target__has_cpu(&opts->target)) + if (target__has_cpu(&opts->target) || opts->target.force_per_cpu) perf_evsel__set_sample_bit(evsel, CPU); if (opts->period) @@ -653,7 +653,7 @@ void perf_evsel__config(struct perf_evsel *evsel, if (!perf_missing_features.sample_id_all && (opts->sample_time || !opts->no_inherit || - target__has_cpu(&opts->target))) + target__has_cpu(&opts->target) || opts->target.force_per_cpu)) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f5029653dcd..1ea7c92e6e3 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -279,6 +279,11 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) return list_entry(evsel->node.next, struct perf_evsel, node); } +static inline struct perf_evsel *perf_evsel__prev(struct perf_evsel *evsel) +{ + return list_entry(evsel->node.prev, struct perf_evsel, node); +} + /** * perf_evsel__is_group_leader - Return whether given evsel is a leader event * diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 0393912d803..84cdb072ac8 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1368,7 +1368,7 @@ int machine__resolve_callchain(struct machine *machine, return unwind__get_entries(unwind_entry, &callchain_cursor, machine, thread, evsel->attr.sample_regs_user, - sample); + sample, max_stack); } diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 89bab7129de..2d0c5069089 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -12,6 +12,7 @@ struct target { uid_t uid; bool system_wide; bool uses_mmap; + bool force_per_cpu; }; enum target_errno { diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 5390d0b8862..0efd5393de8 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -559,7 +559,7 @@ static unw_accessors_t accessors = { }; static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, - void *arg) + void *arg, int max_stack) { unw_addr_space_t addr_space; unw_cursor_t c; @@ -575,7 +575,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, if (ret) display_error(ret); - while (!ret && (unw_step(&c) > 0)) { + while (!ret && (unw_step(&c) > 0) && max_stack--) { unw_word_t ip; unw_get_reg(&c, UNW_REG_IP, &ip); @@ -588,7 +588,8 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, - u64 sample_uregs, struct perf_sample *data) + u64 sample_uregs, struct perf_sample *data, + int max_stack) { unw_word_t ip; struct unwind_info ui = { @@ -610,5 +611,5 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (ret) return -ENOMEM; - return get_entries(&ui, cb, arg); + return get_entries(&ui, cb, arg, max_stack); } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ec0c71a2ca2..d5966f49e22 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -18,7 +18,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, u64 sample_uregs, - struct perf_sample *data); + struct perf_sample *data, int max_stack); int unwind__arch_reg_id(int regnum); #else static inline int @@ -27,7 +27,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, struct machine *machine __maybe_unused, struct thread *thread __maybe_unused, u64 sample_uregs __maybe_unused, - struct perf_sample *data __maybe_unused) + struct perf_sample *data __maybe_unused, + int max_stack __maybe_unused) { return 0; } diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fe702076ca4..9d77f13c2d2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2,7 +2,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel turbo-capable processors. * - * Copyright (c) 2012 Intel Corporation. + * Copyright (c) 2013 Intel Corporation. * Len Brown <len.brown@intel.com> * * This program is free software; you can redistribute it and/or modify it @@ -47,6 +47,8 @@ unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; unsigned int do_c8_c9_c10; +unsigned int do_slm_cstates; +unsigned int use_c1_residency_msr; unsigned int has_aperf; unsigned int has_epb; unsigned int units = 1000000000; /* Ghz etc */ @@ -81,6 +83,8 @@ double rapl_joule_counter_range; #define RAPL_DRAM (1 << 3) #define RAPL_PKG_PERF_STATUS (1 << 4) #define RAPL_DRAM_PERF_STATUS (1 << 5) +#define RAPL_PKG_POWER_INFO (1 << 6) +#define RAPL_CORE_POLICY (1 << 7) #define TJMAX_DEFAULT 100 #define MAX(a, b) ((a) > (b) ? (a) : (b)) @@ -96,7 +100,7 @@ struct thread_data { unsigned long long tsc; unsigned long long aperf; unsigned long long mperf; - unsigned long long c1; /* derived */ + unsigned long long c1; unsigned long long extra_msr64; unsigned long long extra_delta64; unsigned long long extra_msr32; @@ -266,7 +270,7 @@ void print_header(void) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); if (do_nhm_cstates) outp += sprintf(outp, " %%c1"); - if (do_nhm_cstates) + if (do_nhm_cstates && !do_slm_cstates) outp += sprintf(outp, " %%c3"); if (do_nhm_cstates) outp += sprintf(outp, " %%c6"); @@ -280,9 +284,9 @@ void print_header(void) if (do_snb_cstates) outp += sprintf(outp, " %%pc2"); - if (do_nhm_cstates) + if (do_nhm_cstates && !do_slm_cstates) outp += sprintf(outp, " %%pc3"); - if (do_nhm_cstates) + if (do_nhm_cstates && !do_slm_cstates) outp += sprintf(outp, " %%pc6"); if (do_snb_cstates) outp += sprintf(outp, " %%pc7"); @@ -480,7 +484,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (do_nhm_cstates) + if (do_nhm_cstates && !do_slm_cstates) outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); @@ -499,9 +503,9 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); - if (do_nhm_cstates) + if (do_nhm_cstates && !do_slm_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); - if (do_nhm_cstates) + if (do_nhm_cstates && !do_slm_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); @@ -648,17 +652,24 @@ delta_thread(struct thread_data *new, struct thread_data *old, } - /* - * As counter collection is not atomic, - * it is possible for mperf's non-halted cycles + idle states - * to exceed TSC's all cycles: show c1 = 0% in that case. - */ - if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) - old->c1 = 0; - else { - /* normal case, derive c1 */ - old->c1 = old->tsc - old->mperf - core_delta->c3 + if (use_c1_residency_msr) { + /* + * Some models have a dedicated C1 residency MSR, + * which should be more accurate than the derivation below. + */ + } else { + /* + * As counter collection is not atomic, + * it is possible for mperf's non-halted cycles + idle states + * to exceed TSC's all cycles: show c1 = 0% in that case. + */ + if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) + old->c1 = 0; + else { + /* normal case, derive c1 */ + old->c1 = old->tsc - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7; + } } if (old->mperf == 0) { @@ -872,13 +883,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) return -5; + if (use_c1_residency_msr) { + if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) + return -6; + } + /* collect core counters only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - if (do_nhm_cstates) { + if (do_nhm_cstates && !do_slm_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; + } + + if (do_nhm_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; } @@ -898,7 +917,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - if (do_nhm_cstates) { + if (do_nhm_cstates && !do_slm_cstates) { if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) @@ -977,7 +996,7 @@ void print_verbose_header(void) ratio, bclk, ratio * bclk); get_msr(0, MSR_IA32_POWER_CTL, &msr); - fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n", + fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", msr, msr & 0x2 ? "EN" : "DIS"); if (!do_ivt_turbo_ratio_limit) @@ -1046,25 +1065,28 @@ print_nhm_turbo_ratio_limits: switch(msr & 0x7) { case 0: - fprintf(stderr, "pc0"); + fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0"); break; case 1: - fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); + fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0"); break; case 2: - fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); + fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3"); break; case 3: - fprintf(stderr, "pc6"); + fprintf(stderr, do_slm_cstates ? "invalid" : "pc6"); break; case 4: - fprintf(stderr, "pc7"); + fprintf(stderr, do_slm_cstates ? "pc4" : "pc7"); break; case 5: - fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); + fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid"); + break; + case 6: + fprintf(stderr, do_slm_cstates ? "pc6" : "invalid"); break; case 7: - fprintf(stderr, "unlimited"); + fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited"); break; default: fprintf(stderr, "invalid"); @@ -1460,6 +1482,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x3F: /* HSW */ case 0x45: /* HSW */ case 0x46: /* HSW */ + case 0x37: /* BYT */ + case 0x4D: /* AVN */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1532,14 +1556,33 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ +double get_tdp(model) +{ + unsigned long long msr; + + if (do_rapl & RAPL_PKG_POWER_INFO) + if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) + return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + + switch (model) { + case 0x37: + case 0x4D: + return 30.0; + default: + return 135.0; + } +} + + /* * rapl_probe() * - * sets do_rapl + * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units */ void rapl_probe(unsigned int family, unsigned int model) { unsigned long long msr; + unsigned int time_unit; double tdp; if (!genuine_intel) @@ -1555,11 +1598,15 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x3F: /* HSW */ case 0x45: /* HSW */ case 0x46: /* HSW */ - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; case 0x2D: case 0x3E: - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + break; + case 0x37: /* BYT */ + case 0x4D: /* AVN */ + do_rapl = RAPL_PKG | RAPL_CORES ; break; default: return; @@ -1570,19 +1617,22 @@ void rapl_probe(unsigned int family, unsigned int model) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); - rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); - rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); + if (model == 0x37) + rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; + else + rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); - /* get TDP to determine energy counter range */ - if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) - return; + time_unit = msr >> 16 & 0xF; + if (time_unit == 0) + time_unit = 0xA; - tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + rapl_time_units = 1.0 / (1 << (time_unit)); - rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + tdp = get_tdp(model); + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (verbose) - fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); + fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); return; } @@ -1668,7 +1718,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; int cpu; - double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; if (!do_rapl) return 0; @@ -1686,23 +1735,13 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) return -1; - local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); - local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); - local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); - - if (local_rapl_power_units != rapl_power_units) - fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); - if (local_rapl_energy_units != rapl_energy_units) - fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); - if (local_rapl_time_units != rapl_time_units) - fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); - if (verbose) { fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, - local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); + rapl_power_units, rapl_energy_units, rapl_time_units); } - if (do_rapl & RAPL_PKG) { + if (do_rapl & RAPL_PKG_POWER_INFO) { + if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) return -5; @@ -1714,6 +1753,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + } + if (do_rapl & RAPL_PKG) { + if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; @@ -1749,12 +1791,16 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) print_power_limit_msr(cpu, msr, "DRAM Limit"); } - if (do_rapl & RAPL_CORES) { + if (do_rapl & RAPL_CORE_POLICY) { if (verbose) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); + } + } + if (do_rapl & RAPL_CORES) { + if (verbose) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; @@ -1813,10 +1859,48 @@ int has_c8_c9_c10(unsigned int family, unsigned int model) } +int is_slm(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + switch (model) { + case 0x37: /* BYT */ + case 0x4D: /* AVN */ + return 1; + } + return 0; +} + +#define SLM_BCLK_FREQS 5 +double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; + +double slm_bclk(void) +{ + unsigned long long msr = 3; + unsigned int i; + double freq; + + if (get_msr(0, MSR_FSB_FREQ, &msr)) + fprintf(stderr, "SLM BCLK: unknown\n"); + + i = msr & 0xf; + if (i >= SLM_BCLK_FREQS) { + fprintf(stderr, "SLM BCLK[%d] invalid\n", i); + msr = 3; + } + freq = slm_freq_table[i]; + + fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); + + return freq; +} + double discover_bclk(unsigned int family, unsigned int model) { if (is_snb(family, model)) return 100.00; + else if (is_slm(family, model)) + return slm_bclk(); else return 133.33; } @@ -1873,7 +1957,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); - if (target_c_local < 85 || target_c_local > 120) + if (target_c_local < 85 || target_c_local > 127) goto guess; tcc_activation_temp = target_c_local; @@ -1970,6 +2054,7 @@ void check_cpuid() do_smi = do_nhm_cstates; do_snb_cstates = is_snb(family, model); do_c8_c9_c10 = has_c8_c9_c10(family, model); + do_slm_cstates = is_slm(family, model); bclk = discover_bclk(family, model); do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); @@ -2331,7 +2416,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose) - fprintf(stderr, "turbostat v3.4 April 17, 2013" + fprintf(stderr, "turbostat v3.5 April 26, 2013" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index da7a1955828..bdb71a26ae3 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -41,13 +41,14 @@ struct vdev_info { struct vhost_memory *mem; }; -void vq_notify(struct virtqueue *vq) +bool vq_notify(struct virtqueue *vq) { struct vq_info *info = vq->priv; unsigned long long v = 1; int r; r = write(info->kick, &v, sizeof v); assert(r == sizeof v); + return true; } void vq_callback(struct virtqueue *vq) @@ -171,7 +172,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, GFP_ATOMIC); if (likely(r == 0)) { ++started; - virtqueue_kick(vq->vq); + if (unlikely(!virtqueue_kick(vq->vq)) + r = -1; } } else r = -1; diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index d053ea40c00..14a4f4cab5b 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -22,7 +22,7 @@ static u64 user_addr_offset; #define RINGSIZE 256 #define ALIGN 4096 -static void never_notify_host(struct virtqueue *vq) +static bool never_notify_host(struct virtqueue *vq) { abort(); } @@ -65,17 +65,22 @@ struct guest_virtio_device { unsigned long notifies; }; -static void parallel_notify_host(struct virtqueue *vq) +static bool parallel_notify_host(struct virtqueue *vq) { + int rc; struct guest_virtio_device *gvdev; gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev); - write(gvdev->to_host_fd, "", 1); + rc = write(gvdev->to_host_fd, "", 1); + if (rc < 0) + return false; gvdev->notifies++; + return true; } -static void no_notify_host(struct virtqueue *vq) +static bool no_notify_host(struct virtqueue *vq) { + return true; } #define NUM_XFERS (10000000) |