summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/kernel/perf_event.c3
-rw-r--r--arch/arm/kernel/perf_event_v6.c4
-rw-r--r--arch/arm/kernel/perf_event_v7.c4
-rw-r--r--arch/arm/kernel/perf_event_xscale.c8
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c3
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c3
-rw-r--r--arch/sparc/kernel/perf_event.c4
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/perf_event.h8
-rw-r--r--arch/x86/kernel/cpu/perf_event.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--include/linux/perf_event.h29
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/sched/core.c9
-rw-r--r--tools/perf/ui/browser.c65
-rw-r--r--tools/perf/ui/browser.h6
-rw-r--r--tools/perf/ui/browsers/annotate.c115
-rw-r--r--tools/perf/util/annotate.c46
-rw-r--r--tools/perf/util/annotate.h4
24 files changed, 785 insertions, 153 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 0dae252f7a3..d821b17047e 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,6 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
idx = la_ptr;
- perf_sample_data_init(&data, 0);
for (j = 0; j < cpuc->n_events; j++) {
if (cpuc->current_idx[j] == idx)
break;
@@ -848,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
hwc = &event->hw;
alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (alpha_perf_event_set_period(event, hwc, idx)) {
if (perf_event_overflow(event, &data, regs)) {
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index b78af0cc6ef..ab627a740fa 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -489,8 +489,6 @@ armv6pmu_handle_irq(int irq_num,
*/
armv6_pmcr_write(pmcr);
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
@@ -509,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!armpmu_event_set_period(event, hwc, idx))
continue;
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 00755d82e2f..d3c53606816 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1077,8 +1077,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
*/
regs = get_irq_regs();
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
@@ -1097,7 +1095,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!armpmu_event_set_period(event, hwc, idx))
continue;
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 71a21e6712f..e34e7254e65 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -248,8 +248,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
regs = get_irq_regs();
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
@@ -263,7 +261,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!armpmu_event_set_period(event, hwc, idx))
continue;
@@ -588,8 +586,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
regs = get_irq_regs();
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
@@ -603,7 +599,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!armpmu_event_set_period(event, hwc, idx))
continue;
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 811084f4e42..ab73fa2fb9b 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1325,7 +1325,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
regs = get_irq_regs();
- perf_sample_data_init(&data, 0);
+ perf_sample_data_init(&data, 0, 0);
switch (counters) {
#define HANDLE_COUNTER(n) \
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 02aee03e713..8f84bcba18d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1299,8 +1299,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, ~0ULL);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 0a6d2a9d569..106c5335467 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -613,8 +613,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (perf_event_overflow(event, &data, regs))
fsl_emb_pmu_stop(event, 0);
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 28559ce5eeb..5713957dcb8 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1296,8 +1296,6 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
regs = args->regs;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/* If the PMU has the TOE IRQ enable bits, we need to do a
@@ -1321,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
if (val & (1ULL << 31))
continue;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!sparc_perf_event_set_period(event, hwc, idx))
continue;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f6..957ec87385a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
#define MSR_AMD64_IBSOPCTL 0xc0011033
#define MSR_AMD64_IBSOPRIP 0xc0011034
#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
#define MSR_AMD64_IBSOPDATA3 0xc0011037
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b183..4e40a64315c 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
#define IBS_CAPS_OPCNT (1U<<4)
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
+#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -170,19 +171,22 @@ struct x86_pmu_capability {
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
#define IBSCTL_LVT_OFFSET_MASK 0x0F
-/* IbsFetchCtl bits/masks */
+/* ibs fetch bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
-/* IbsOpCtl bits */
+/* ibs op bits/masks */
+/* lower 4 bits of the current count are ignored: */
+#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_RIP_INVALID (1ULL<<38)
extern u32 get_ibs_caps(void);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e33e9cf160e..e049d6da018 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1183,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1219,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
* event overflow
*/
handled++;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (!x86_perf_event_set_period(event))
continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 589286f2887..65652265fff 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
static int amd_pmu_hw_config(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
+ int ret;
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ ret = x86_pmu_hw_config(event);
if (ret)
return ret;
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14..da9bcdcd985 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
#include <linux/perf_event.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/ptrace.h>
#include <asm/apic.h>
@@ -16,36 +17,591 @@ static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-static struct pmu perf_ibs;
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+enum ibs_states {
+ IBS_ENABLED = 0,
+ IBS_STARTED = 1,
+ IBS_STOPPING = 2,
+
+ IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+ struct perf_event *event;
+ unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+ u64 (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left < (s64)min)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ /*
+ * If the hw period that triggers the sw overflow is too short
+ * we might hit the irq handler. This biases the results.
+ * Thus we shorten the next-to-last period and set the last
+ * period to the max period.
+ */
+ if (left > max) {
+ left -= max;
+ if (left > max)
+ left = max;
+ else if (left < min)
+ left = min;
+ }
+
+ *hw_period = (u64)left;
+
+ return overflow;
+}
+
+static int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - width;
+ u64 prev_raw_count;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ return 0;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
static int perf_ibs_init(struct perf_event *event)
{
- if (perf_ibs.type != event->attr.type)
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+ int ret;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
+
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+ /*
+ * lower 4 bits can not be set in ibs max cnt,
+ * but allowing it in case we adjust the
+ * sample period to set a frequency.
+ */
+ return -EINVAL;
+ hwc->sample_period &= ~0x0FULL;
+ if (!hwc->sample_period)
+ hwc->sample_period = 0x10;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ config &= ~perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * If we modify hwc->sample_period, we also need to update
+ * hwc->last_period and hwc->period_left.
+ */
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
return 0;
}
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 *period)
+{
+ int overflow;
+
+ /* ignore lower 4 bits in min count: */
+ overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ local64_set(&hwc->prev_count, 0);
+
+ return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+ return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+ u64 count = 0;
+
+ if (config & IBS_OP_VAL)
+ count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+ if (ibs_caps & IBS_CAPS_RDWROPCNT)
+ count += (config & IBS_OP_CUR_CNT) >> 32;
+
+ return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+ u64 *config)
+{
+ u64 count = perf_ibs->get_count(*config);
+
+ /*
+ * Set width to 64 since we do not overflow on max width but
+ * instead on max count. In perf_ibs_set_period() we clear
+ * prev count manually on overflow.
+ */
+ while (!perf_event_try_update(event, count, 64)) {
+ rdmsrl(event->hw.config_base, *config);
+ count = perf_ibs->get_count(*config);
+ }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ config &= ~perf_ibs->cnt_mask;
+ wrmsrl(hwc->config_base, config);
+ config &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 period;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ perf_ibs_set_period(perf_ibs, hwc, &period);
+ set_bit(IBS_STARTED, pcpu->state);
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 config;
+ int stopping;
+
+ stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+ if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+ return;
+
+ rdmsrl(hwc->config_base, config);
+
+ if (stopping) {
+ set_bit(IBS_STOPPING, pcpu->state);
+ perf_ibs_disable_event(perf_ibs, hwc, config);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /*
+ * Clear valid bit to not count rollovers on update, rollovers
+ * are only updated in the irq handler.
+ */
+ config &= ~perf_ibs->valid_mask;
+
+ perf_ibs_event_update(perf_ibs, event, &config);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
static int perf_ibs_add(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+ return -ENOSPC;
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ pcpu->event = event;
+
+ if (flags & PERF_EF_START)
+ perf_ibs_start(event, PERF_EF_RELOAD);
+
return 0;
}
static void perf_ibs_del(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+ return;
+
+ perf_ibs_stop(event, PERF_EF_UPDATE);
+
+ pcpu->event = NULL;
+
+ perf_event_update_userpage(event);
}
-static struct pmu perf_ibs = {
- .event_init= perf_ibs_init,
- .add= perf_ibs_add,
- .del= perf_ibs_del,
+static void perf_ibs_read(struct perf_event *event) { }
+
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+ .valid_mask = IBS_FETCH_VAL,
+ .max_period = IBS_FETCH_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
+ .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+
+ .get_count = get_ibs_fetch_count,
};
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
+ .valid_mask = IBS_OP_VAL,
+ .max_period = IBS_OP_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
+ .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+
+ .get_count = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ struct perf_event *event = pcpu->event;
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ struct perf_ibs_data ibs_data;
+ int offset, size, check_rip, offset_max, throttle = 0;
+ unsigned int msr;
+ u64 *buf, *config, period;
+
+ if (!test_bit(IBS_STARTED, pcpu->state)) {
+ /*
+ * Catch spurious interrupts after stopping IBS: After
+ * disabling IBS there could be still incomming NMIs
+ * with samples that even have the valid bit cleared.
+ * Mark all this NMIs as handled.
+ */
+ return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+ }
+
+ msr = hwc->config_base;
+ buf = ibs_data.regs;
+ rdmsrl(msr, *buf);
+ if (!(*buf++ & perf_ibs->valid_mask))
+ return 0;
+
+ config = &ibs_data.regs[0];
+ perf_ibs_event_update(perf_ibs, event, config);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+ goto out; /* no sw counter overflow */
+
+ ibs_data.caps = ibs_caps;
+ size = 1;
+ offset = 1;
+ check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+ if (event->attr.sample_type & PERF_SAMPLE_RAW)
+ offset_max = perf_ibs->offset_max;
+ else if (check_rip)
+ offset_max = 2;
+ else
+ offset_max = 1;
+ do {
+ rdmsrl(msr + offset, *buf++);
+ size++;
+ offset = find_next_bit(perf_ibs->offset_mask,
+ perf_ibs->offset_max,
+ offset + 1);
+ } while (offset < offset_max);
+ ibs_data.size = sizeof(u64) * size;
+
+ regs = *iregs;
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
+ instruction_pointer_set(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }
+
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.size = sizeof(u32) + ibs_data.size;
+ raw.data = ibs_data.data;
+ data.raw = &raw;
+ }
+
+ throttle = perf_event_overflow(event, &data, &regs);
+out:
+ if (throttle)
+ perf_ibs_disable_event(perf_ibs, hwc, *config);
+ else
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+
+ return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ int handled = 0;
+
+ handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+ handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+ struct cpu_perf_ibs __percpu *pcpu;
+ int ret;
+
+ pcpu = alloc_percpu(struct cpu_perf_ibs);
+ if (!pcpu)
+ return -ENOMEM;
+
+ perf_ibs->pcpu = pcpu;
+
+ ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+ if (ret) {
+ perf_ibs->pcpu = NULL;
+ free_percpu(pcpu);
+ }
+
+ return ret;
+}
+
static __init int perf_event_ibs_init(void)
{
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs, "ibs", -1);
+ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+ register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef10..166546ec6ae 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 status;
int handled;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1082,7 +1080,7 @@ again:
if (!intel_pmu_save_and_restart(event))
continue;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7d..5a3edc27f6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
ds->bts_index = ds->bts_buffer_base;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs.ip = 0;
/*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (!intel_pmu_save_and_restart(event))
return;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
/*
* We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd710..47124a73dd7 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
handled += overflow;
/* event overflow for sure */
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!x86_perf_event_set_period(event))
continue;
+
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ddbb6a901f6..8adf70e9e3c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1084,10 +1084,8 @@ extern void perf_pmu_unregister(struct pmu *pmu);
extern int perf_num_counters(void);
extern const char *perf_pmu_name(void);
-extern void __perf_event_task_sched_in(struct task_struct *prev,
- struct task_struct *task);
-extern void __perf_event_task_sched_out(struct task_struct *prev,
- struct task_struct *next);
+extern void __perf_event_task_sched(struct task_struct *prev,
+ struct task_struct *next);
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
@@ -1132,11 +1130,14 @@ struct perf_sample_data {
struct perf_branch_stack *br_stack;
};
-static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+static inline void perf_sample_data_init(struct perf_sample_data *data,
+ u64 addr, u64 period)
{
+ /* remaining struct members initialized in perf_prepare_sample() */
data->addr = addr;
data->raw = NULL;
data->br_stack = NULL;
+ data->period = period;
}
extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1204,20 +1205,13 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
extern struct static_key_deferred perf_sched_events;
-static inline void perf_event_task_sched_in(struct task_struct *prev,
+static inline void perf_event_task_sched(struct task_struct *prev,
struct task_struct *task)
{
- if (static_key_false(&perf_sched_events.key))
- __perf_event_task_sched_in(prev, task);
-}
-
-static inline void perf_event_task_sched_out(struct task_struct *prev,
- struct task_struct *next)
-{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
if (static_key_false(&perf_sched_events.key))
- __perf_event_task_sched_out(prev, next);
+ __perf_event_task_sched(prev, task);
}
extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1292,11 +1286,8 @@ extern void perf_event_disable(struct perf_event *event);
extern void perf_event_task_tick(void);
#else
static inline void
-perf_event_task_sched_in(struct task_struct *prev,
- struct task_struct *task) { }
-static inline void
-perf_event_task_sched_out(struct task_struct *prev,
- struct task_struct *next) { }
+perf_event_task_sched(struct task_struct *prev,
+ struct task_struct *task) { }
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9789a56b7d5..e82c7a1face 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
* accessing the event control register. If a NMI hits, then it will
* not restart the event.
*/
-void __perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next)
+static void __perf_event_task_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
int ctxn;
@@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *prev,
- struct task_struct *task)
+static void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2305,6 +2305,12 @@ void __perf_event_task_sched_in(struct task_struct *prev,
perf_branch_stack_sched_in(prev, task);
}
+void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next)
+{
+ __perf_event_task_sched_out(prev, next);
+ __perf_event_task_sched_in(prev, next);
+}
+
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
{
u64 frequency = event->attr.sample_freq;
@@ -4957,7 +4963,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
if (rctx < 0)
return;
- perf_sample_data_init(&data, addr);
+ perf_sample_data_init(&data, addr, 0);
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
@@ -5215,7 +5221,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
.data = record,
};
- perf_sample_data_init(&data, addr);
+ perf_sample_data_init(&data, addr, 0);
data.raw = &raw;
hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
@@ -5318,7 +5324,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
- perf_sample_data_init(&sample, bp->attr.bp_addr);
+ perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
if (!bp->hw.state && !perf_exclude_event(bp, regs))
perf_swevent_event(bp, 1, &sample, regs);
@@ -5344,8 +5350,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event);
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs = get_irq_regs();
if (regs && !perf_exclude_event(event, regs)) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30..5c692a0a555 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1913,7 +1913,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next);
+ perf_event_task_sched(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
@@ -1956,13 +1956,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_disable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(prev, current);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_enable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 32ac1165100..cde4d0f0ddb 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -593,6 +593,15 @@ unsigned int ui_browser__argv_refresh(struct ui_browser *browser)
return row;
}
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+ u16 start, u16 end)
+{
+ SLsmg_set_char_set(1);
+ ui_browser__gotorc(browser, start, column);
+ SLsmg_draw_vline(end - start + 1);
+ SLsmg_set_char_set(0);
+}
+
void ui_browser__write_graph(struct ui_browser *browser __used, int graph)
{
SLsmg_set_char_set(1);
@@ -600,8 +609,9 @@ void ui_browser__write_graph(struct ui_browser *browser __used, int graph)
SLsmg_set_char_set(0);
}
-void __ui_browser__line_arrow_up(struct ui_browser *browser, unsigned int column,
- u64 start, u64 end, int start_width)
+static void __ui_browser__line_arrow_up(struct ui_browser *browser,
+ unsigned int column,
+ u64 start, u64 end)
{
unsigned int row, end_row;
@@ -612,7 +622,7 @@ void __ui_browser__line_arrow_up(struct ui_browser *browser, unsigned int column
ui_browser__gotorc(browser, row, column);
SLsmg_write_char(SLSMG_LLCORN_CHAR);
ui_browser__gotorc(browser, row, column + 1);
- SLsmg_draw_hline(start_width);
+ SLsmg_draw_hline(2);
if (row-- == 0)
goto out;
@@ -639,6 +649,55 @@ out:
SLsmg_set_char_set(0);
}
+static void __ui_browser__line_arrow_down(struct ui_browser *browser,
+ unsigned int column,
+ u64 start, u64 end)
+{
+ unsigned int row, end_row;
+
+ SLsmg_set_char_set(1);
+
+ if (start >= browser->top_idx) {
+ row = start - browser->top_idx;
+ ui_browser__gotorc(browser, row, column);
+ SLsmg_write_char(SLSMG_ULCORN_CHAR);
+ ui_browser__gotorc(browser, row, column + 1);
+ SLsmg_draw_hline(2);
+
+ if (row++ == 0)
+ goto out;
+ } else
+ row = 0;
+
+ if (end >= browser->top_idx + browser->height)
+ end_row = browser->height - 1;
+ else
+ end_row = end - browser->top_idx;;
+
+ ui_browser__gotorc(browser, row, column);
+ SLsmg_draw_vline(end_row - row + 1);
+
+ ui_browser__gotorc(browser, end_row, column);
+ if (end < browser->top_idx + browser->height) {
+ SLsmg_write_char(SLSMG_LLCORN_CHAR);
+ ui_browser__gotorc(browser, end_row, column + 1);
+ SLsmg_write_char(SLSMG_HLINE_CHAR);
+ ui_browser__gotorc(browser, end_row, column + 2);
+ SLsmg_write_char(SLSMG_RARROW_CHAR);
+ }
+out:
+ SLsmg_set_char_set(0);
+}
+
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+ u64 start, u64 end)
+{
+ if (start > end)
+ __ui_browser__line_arrow_up(browser, column, start, end);
+ else
+ __ui_browser__line_arrow_down(browser, column, start, end);
+}
+
void ui_browser__init(void)
{
int i = 0;
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 2f226cb79f6..dd96d822990 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -38,8 +38,8 @@ void ui_browser__reset_index(struct ui_browser *self);
void ui_browser__gotorc(struct ui_browser *self, int y, int x);
void ui_browser__write_graph(struct ui_browser *browser, int graph);
-void __ui_browser__line_arrow_up(struct ui_browser *browser, unsigned int column,
- u64 start, u64 end, int start_width);
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+ u64 start, u64 end);
void __ui_browser__show_title(struct ui_browser *browser, const char *title);
void ui_browser__show_title(struct ui_browser *browser, const char *title);
int ui_browser__show(struct ui_browser *self, const char *title,
@@ -49,6 +49,8 @@ int ui_browser__refresh(struct ui_browser *self);
int ui_browser__run(struct ui_browser *browser, int delay_secs);
void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries);
void ui_browser__handle_resize(struct ui_browser *browser);
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+ u16 start, u16 end);
int ui_browser__warning(struct ui_browser *browser, int timeout,
const char *format, ...);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 077380baa1c..f171b4627cb 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -30,8 +30,11 @@ struct annotate_browser {
int nr_entries;
bool hide_src_code;
bool use_offset;
+ bool jump_arrows;
bool searching_backwards;
- u8 offset_width;
+ u8 addr_width;
+ u8 min_addr_width;
+ u8 max_addr_width;
char search_bf[128];
};
@@ -61,47 +64,46 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
bool change_color = (!ab->hide_src_code &&
(!current_entry || (self->use_navkeypressed &&
!self->navkeypressed)));
- int width = self->width;
+ int width = self->width, printed;
+ char bf[256];
- if (dl->offset != -1) {
+ if (dl->offset != -1 && bdl->percent != 0.0) {
ui_browser__set_percent_color(self, bdl->percent, current_entry);
- slsmg_printf(" %7.2f ", bdl->percent);
+ slsmg_printf("%6.2f ", bdl->percent);
} else {
ui_browser__set_percent_color(self, 0, current_entry);
- slsmg_write_nstring(" ", 9);
+ slsmg_write_nstring(" ", 7);
}
- ui_browser__write_graph(self, SLSMG_VLINE_CHAR);
SLsmg_write_char(' ');
/* The scroll bar isn't being used */
if (!self->navkeypressed)
width += 1;
- if (dl->offset != -1 && change_color)
- ui_browser__set_color(self, HE_COLORSET_CODE);
-
if (!*dl->line)
- slsmg_write_nstring(" ", width - 10);
- else if (dl->offset == -1)
- slsmg_write_nstring(dl->line, width - 10);
- else {
- char bf[256];
+ slsmg_write_nstring(" ", width - 7);
+ else if (dl->offset == -1) {
+ printed = scnprintf(bf, sizeof(bf), "%*s ",
+ ab->addr_width, " ");
+ slsmg_write_nstring(bf, printed);
+ slsmg_write_nstring(dl->line, width - printed - 6);
+ } else {
u64 addr = dl->offset;
- int printed, color = -1;
+ int color = -1;
if (!ab->use_offset)
addr += ab->start;
if (!ab->use_offset) {
- printed = scnprintf(bf, sizeof(bf), " %" PRIx64 ":", addr);
+ printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
} else {
if (bdl->jump_target) {
- printed = scnprintf(bf, sizeof(bf), " %*" PRIx64 ":",
- ab->offset_width, addr);
+ printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+ ab->addr_width, addr);
} else {
- printed = scnprintf(bf, sizeof(bf), " %*s ",
- ab->offset_width, " ");
+ printed = scnprintf(bf, sizeof(bf), "%*s ",
+ ab->addr_width, " ");
}
}
@@ -117,12 +119,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
ui_browser__write_graph(self, fwd ? SLSMG_DARROW_CHAR :
SLSMG_UARROW_CHAR);
SLsmg_write_char(' ');
+ } else if (ins__is_call(dl->ins)) {
+ ui_browser__write_graph(self, SLSMG_RARROW_CHAR);
+ SLsmg_write_char(' ');
} else {
slsmg_write_nstring(" ", 2);
}
-
- dl->ins->ops->scnprintf(dl->ins, bf, sizeof(bf), &dl->ops,
- !ab->use_offset);
} else {
if (strcmp(dl->name, "retq")) {
slsmg_write_nstring(" ", 2);
@@ -130,68 +132,56 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
ui_browser__write_graph(self, SLSMG_LARROW_CHAR);
SLsmg_write_char(' ');
}
-
- scnprintf(bf, sizeof(bf), "%-6.6s %s", dl->name, dl->ops.raw);
}
- slsmg_write_nstring(bf, width - 12 - printed);
+ disasm_line__scnprintf(dl, bf, sizeof(bf), !ab->use_offset);
+ slsmg_write_nstring(bf, width - 10 - printed);
}
if (current_entry)
ab->selection = dl;
}
-static void annotate_browser__draw_current_loop(struct ui_browser *browser)
+static void annotate_browser__draw_current_jump(struct ui_browser *browser)
{
struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
- struct map_symbol *ms = browser->priv;
- struct symbol *sym = ms->sym;
- struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *cursor = ab->selection, *pos = cursor, *target;
- struct browser_disasm_line *bcursor = disasm_line__browser(cursor),
- *btarget, *bpos;
- unsigned int from, to, start_width = 2;
-
- list_for_each_entry_from(pos, &notes->src->source, node) {
- if (!pos->ins || !ins__is_jump(pos->ins) ||
- !disasm_line__has_offset(pos))
- continue;
+ struct disasm_line *cursor = ab->selection, *target;
+ struct browser_disasm_line *btarget, *bcursor;
+ unsigned int from, to;
- target = ab->offsets[pos->ops.target.offset];
- if (!target)
- continue;
+ if (!cursor->ins || !ins__is_jump(cursor->ins) ||
+ !disasm_line__has_offset(cursor))
+ return;
- btarget = disasm_line__browser(target);
- if (btarget->idx <= bcursor->idx)
- goto found;
- }
+ target = ab->offsets[cursor->ops.target.offset];
+ if (!target)
+ return;
- return;
+ bcursor = disasm_line__browser(cursor);
+ btarget = disasm_line__browser(target);
-found:
- bpos = disasm_line__browser(pos);
if (ab->hide_src_code) {
- from = bpos->idx_asm;
+ from = bcursor->idx_asm;
to = btarget->idx_asm;
} else {
- from = (u64)bpos->idx;
+ from = (u64)bcursor->idx;
to = (u64)btarget->idx;
}
ui_browser__set_color(browser, HE_COLORSET_CODE);
-
- if (!bpos->jump_target)
- start_width += ab->offset_width + 1;
-
- __ui_browser__line_arrow_up(browser, 10, from, to, start_width);
+ __ui_browser__line_arrow(browser, 9 + ab->addr_width, from, to);
}
static unsigned int annotate_browser__refresh(struct ui_browser *browser)
{
+ struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
int ret = ui_browser__list_head_refresh(browser);
- annotate_browser__draw_current_loop(browser);
+ if (ab->jump_arrows)
+ annotate_browser__draw_current_jump(browser);
+ ui_browser__set_color(browser, HE_COLORSET_NORMAL);
+ __ui_browser__vline(browser, 7, 0, browser->height - 1);
return ret;
}
@@ -624,6 +614,13 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
case 'O':
case 'o':
self->use_offset = !self->use_offset;
+ if (self->use_offset)
+ self->addr_width = self->min_addr_width;
+ else
+ self->addr_width = self->max_addr_width;
+ continue;
+ case 'j':
+ self->jump_arrows = !self->jump_arrows;
continue;
case '/':
if (annotate_browser__search(self, delay_secs)) {
@@ -736,6 +733,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
.use_navkeypressed = true,
},
.use_offset = true,
+ .jump_arrows = true,
};
int ret = -1;
@@ -786,7 +784,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
annotate_browser__mark_jump_targets(&browser, size);
- browser.offset_width = hex_width(size);
+ browser.addr_width = browser.min_addr_width = hex_width(size);
+ browser.max_addr_width = hex_width(sym->end);
browser.b.nr_entries = browser.nr_entries;
browser.b.entries = &notes->src->source,
browser.b.width += 18; /* Percentage */
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 5eb34123f55..6b4146b40a2 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -18,6 +18,21 @@
const char *disassembler_style;
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ if (ins->ops->scnprintf)
+ return ins->ops->scnprintf(ins, bf, size, ops);
+
+ return ins__raw_scnprintf(ins, bf, size, ops);
+}
+
static int call__parse(struct ins_operands *ops)
{
char *endptr, *tok, *name;
@@ -50,11 +65,8 @@ indirect_call:
}
static int call__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, bool addrs)
+ struct ins_operands *ops)
{
- if (addrs)
- return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw);
-
if (ops->target.name)
return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name);
@@ -86,11 +98,8 @@ static int jump__parse(struct ins_operands *ops)
}
static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, bool addrs)
+ struct ins_operands *ops)
{
- if (addrs)
- return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw);
-
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
}
@@ -104,6 +113,16 @@ bool ins__is_jump(const struct ins *ins)
return ins->ops == &jump_ops;
}
+static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size,
+ struct ins_operands *ops __used)
+{
+ return scnprintf(bf, size, "%-6.6s", "nop");
+}
+
+static struct ins_ops nop_ops = {
+ .scnprintf = nop__scnprintf,
+};
+
/*
* Must be sorted by name!
*/
@@ -145,6 +164,9 @@ static struct ins instructions[] = {
{ .name = "jrcxz", .ops = &jump_ops, },
{ .name = "js", .ops = &jump_ops, },
{ .name = "jz", .ops = &jump_ops, },
+ { .name = "nop", .ops = &nop_ops, },
+ { .name = "nopl", .ops = &nop_ops, },
+ { .name = "nopw", .ops = &nop_ops, },
};
static int ins__cmp(const void *name, const void *insp)
@@ -296,6 +318,14 @@ void disasm_line__free(struct disasm_line *dl)
free(dl);
}
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
+{
+ if (raw || !dl->ins)
+ return scnprintf(bf, size, "%-6.6s %s", dl->name, dl->ops.raw);
+
+ return ins__scnprintf(dl->ins, bf, size, &dl->ops);
+}
+
static void disasm__add(struct list_head *head, struct disasm_line *line)
{
list_add_tail(&line->node, head);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 13a21f10dab..bb0a9f27165 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -22,7 +22,7 @@ struct ins_operands {
struct ins_ops {
int (*parse)(struct ins_operands *ops);
int (*scnprintf)(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, bool addrs);
+ struct ins_operands *ops);
};
struct ins {
@@ -32,6 +32,7 @@ struct ins {
bool ins__is_jump(const struct ins *ins);
bool ins__is_call(const struct ins *ins);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
struct disasm_line {
struct list_head node;
@@ -49,6 +50,7 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl)
void disasm_line__free(struct disasm_line *dl);
struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos);
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
size_t disasm__fprintf(struct list_head *head, FILE *fp);
struct sym_hist {