diff options
24 files changed, 785 insertions, 153 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 0dae252f7a3..d821b17047e 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -824,7 +824,6 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, idx = la_ptr; - perf_sample_data_init(&data, 0); for (j = 0; j < cpuc->n_events; j++) { if (cpuc->current_idx[j] == idx) break; @@ -848,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, hwc = &event->hw; alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (alpha_perf_event_set_period(event, hwc, idx)) { if (perf_event_overflow(event, &data, regs)) { diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index b78af0cc6ef..ab627a740fa 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -489,8 +489,6 @@ armv6pmu_handle_irq(int irq_num, */ armv6_pmcr_write(pmcr); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -509,7 +507,7 @@ armv6pmu_handle_irq(int irq_num, hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 00755d82e2f..d3c53606816 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1077,8 +1077,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -1097,7 +1095,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 71a21e6712f..e34e7254e65 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -248,8 +248,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -263,7 +261,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; @@ -588,8 +586,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -603,7 +599,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 811084f4e42..ab73fa2fb9b 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1325,7 +1325,7 @@ static int mipsxx_pmu_handle_shared_irq(void) regs = get_irq_regs(); - perf_sample_data_init(&data, 0); + perf_sample_data_init(&data, 0, 0); switch (counters) { #define HANDLE_COUNTER(n) \ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 02aee03e713..8f84bcba18d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1299,8 +1299,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (record) { struct perf_sample_data data; - perf_sample_data_init(&data, ~0ULL); - data.period = event->hw.last_period; + perf_sample_data_init(&data, ~0ULL, event->hw.last_period); if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 0a6d2a9d569..106c5335467 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -613,8 +613,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (record) { struct perf_sample_data data; - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); if (perf_event_overflow(event, &data, regs)) fsl_emb_pmu_stop(event, 0); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 28559ce5eeb..5713957dcb8 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1296,8 +1296,6 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); /* If the PMU has the TOE IRQ enable bits, we need to do a @@ -1321,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, if (val & (1ULL << 31)) continue; - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!sparc_perf_event_set_period(event, hwc, idx)) continue; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ccb805966f6..957ec87385a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -134,6 +134,8 @@ #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) #define MSR_AMD64_IBSOPCTL 0xc0011033 #define MSR_AMD64_IBSOPRIP 0xc0011034 #define MSR_AMD64_IBSOPDATA 0xc0011035 @@ -141,8 +143,11 @@ #define MSR_AMD64_IBSOPDATA3 0xc0011037 #define MSR_AMD64_IBSDCLINAD 0xc0011038 #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSOP_REG_COUNT 7 +#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) #define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2291895b183..4e40a64315c 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -158,6 +158,7 @@ struct x86_pmu_capability { #define IBS_CAPS_OPCNT (1U<<4) #define IBS_CAPS_BRNTRGT (1U<<5) #define IBS_CAPS_OPCNTEXT (1U<<6) +#define IBS_CAPS_RIPINVALIDCHK (1U<<7) #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | IBS_CAPS_FETCHSAM \ @@ -170,19 +171,22 @@ struct x86_pmu_capability { #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) #define IBSCTL_LVT_OFFSET_MASK 0x0F -/* IbsFetchCtl bits/masks */ +/* ibs fetch bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT 0xFFFF0000ULL #define IBS_FETCH_MAX_CNT 0x0000FFFFULL -/* IbsOpCtl bits */ +/* ibs op bits/masks */ +/* lower 4 bits of the current count are ignored: */ +#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +#define IBS_RIP_INVALID (1ULL<<38) extern u32 get_ibs_caps(void); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e33e9cf160e..e049d6da018 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1183,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); /* @@ -1219,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) * event overflow */ handled++; - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); if (!x86_perf_event_set_period(event)) continue; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 589286f2887..65652265fff 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event) static int amd_pmu_hw_config(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); + int ret; + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return -ENOENT; + + ret = x86_pmu_hw_config(event); if (ret) return ret; diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 3b8a2d30d14..da9bcdcd985 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -9,6 +9,7 @@ #include <linux/perf_event.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/ptrace.h> #include <asm/apic.h> @@ -16,36 +17,591 @@ static u32 ibs_caps; #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) -static struct pmu perf_ibs; +#include <linux/kprobes.h> +#include <linux/hardirq.h> + +#include <asm/nmi.h> + +#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) +#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT + +enum ibs_states { + IBS_ENABLED = 0, + IBS_STARTED = 1, + IBS_STOPPING = 2, + + IBS_MAX_STATES, +}; + +struct cpu_perf_ibs { + struct perf_event *event; + unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; +}; + +struct perf_ibs { + struct pmu pmu; + unsigned int msr; + u64 config_mask; + u64 cnt_mask; + u64 enable_mask; + u64 valid_mask; + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; + struct cpu_perf_ibs __percpu *pcpu; + u64 (*get_count)(u64 config); +}; + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; + +static int +perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) +{ + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int overflow = 0; + + /* + * If we are way outside a reasonable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (unlikely(left < (s64)min)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + /* + * If the hw period that triggers the sw overflow is too short + * we might hit the irq handler. This biases the results. + * Thus we shorten the next-to-last period and set the last + * period to the max period. + */ + if (left > max) { + left -= max; + if (left > max) + left = max; + else if (left < min) + left = min; + } + + *hw_period = (u64)left; + + return overflow; +} + +static int +perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - width; + u64 prev_raw_count; + u64 delta; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ + prev_raw_count = local64_read(&hwc->prev_count); + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + return 0; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return 1; +} + +static struct perf_ibs perf_ibs_fetch; +static struct perf_ibs perf_ibs_op; + +static struct perf_ibs *get_ibs_pmu(int type) +{ + if (perf_ibs_fetch.pmu.type == type) + return &perf_ibs_fetch; + if (perf_ibs_op.pmu.type == type) + return &perf_ibs_op; + return NULL; +} + +/* + * Use IBS for precise event sampling: + * + * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count + * perf record -a -e r076:p ... # same as -e cpu-cycles:p + * perf record -a -e r0C1:p ... # use ibs op counting micro-ops + * + * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, + * MSRC001_1033) is used to select either cycle or micro-ops counting + * mode. + * + * The rip of IBS samples has skid 0. Thus, IBS supports precise + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the + * rip is invalid when IBS was not able to record the rip correctly. + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. + * + */ +static int perf_ibs_precise_event(struct perf_event *event, u64 *config) +{ + switch (event->attr.precise_ip) { + case 0: + return -ENOENT; + case 1: + case 2: + break; + default: + return -EOPNOTSUPP; + } + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + switch (event->attr.config) { + case PERF_COUNT_HW_CPU_CYCLES: + *config = 0; + return 0; + } + break; + case PERF_TYPE_RAW: + switch (event->attr.config) { + case 0x0076: + *config = 0; + return 0; + case 0x00C1: + *config = IBS_OP_CNT_CTL; + return 0; + } + break; + default: + return -ENOENT; + } + + return -EOPNOTSUPP; +} static int perf_ibs_init(struct perf_event *event) { - if (perf_ibs.type != event->attr.type) + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs; + u64 max_cnt, config; + int ret; + + perf_ibs = get_ibs_pmu(event->attr.type); + if (perf_ibs) { + config = event->attr.config; + } else { + perf_ibs = &perf_ibs_op; + ret = perf_ibs_precise_event(event, &config); + if (ret) + return ret; + } + + if (event->pmu != &perf_ibs->pmu) return -ENOENT; + + if (config & ~perf_ibs->config_mask) + return -EINVAL; + + if (hwc->sample_period) { + if (config & perf_ibs->cnt_mask) + /* raw max_cnt may not be set */ + return -EINVAL; + if (!event->attr.sample_freq && hwc->sample_period & 0x0f) + /* + * lower 4 bits can not be set in ibs max cnt, + * but allowing it in case we adjust the + * sample period to set a frequency. + */ + return -EINVAL; + hwc->sample_period &= ~0x0FULL; + if (!hwc->sample_period) + hwc->sample_period = 0x10; + } else { + max_cnt = config & perf_ibs->cnt_mask; + config &= ~perf_ibs->cnt_mask; + event->attr.sample_period = max_cnt << 4; + hwc->sample_period = event->attr.sample_period; + } + + if (!hwc->sample_period) + return -EINVAL; + + /* + * If we modify hwc->sample_period, we also need to update + * hwc->last_period and hwc->period_left. + */ + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + + hwc->config_base = perf_ibs->msr; + hwc->config = config; + return 0; } +static int perf_ibs_set_period(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 *period) +{ + int overflow; + + /* ignore lower 4 bits in min count: */ + overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); + local64_set(&hwc->prev_count, 0); + + return overflow; +} + +static u64 get_ibs_fetch_count(u64 config) +{ + return (config & IBS_FETCH_CNT) >> 12; +} + +static u64 get_ibs_op_count(u64 config) +{ + u64 count = 0; + + if (config & IBS_OP_VAL) + count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ + + if (ibs_caps & IBS_CAPS_RDWROPCNT) + count += (config & IBS_OP_CUR_CNT) >> 32; + + return count; +} + +static void +perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, + u64 *config) +{ + u64 count = perf_ibs->get_count(*config); + + /* + * Set width to 64 since we do not overflow on max width but + * instead on max count. In perf_ibs_set_period() we clear + * prev count manually on overflow. + */ + while (!perf_event_try_update(event, count, 64)) { + rdmsrl(event->hw.config_base, *config); + count = perf_ibs->get_count(*config); + } +} + +static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) +{ + wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); +} + +/* + * Erratum #420 Instruction-Based Sampling Engine May Generate + * Interrupt that Cannot Be Cleared: + * + * Must clear counter mask first, then clear the enable bit. See + * Revision Guide for AMD Family 10h Processors, Publication #41322. + */ +static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) +{ + config &= ~perf_ibs->cnt_mask; + wrmsrl(hwc->config_base, config); + config &= ~perf_ibs->enable_mask; + wrmsrl(hwc->config_base, config); +} + +/* + * We cannot restore the ibs pmu state, so we always needs to update + * the event while stopping it and then reset the state when starting + * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in + * perf_ibs_start()/perf_ibs_stop() and instead always do it. + */ +static void perf_ibs_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + u64 period; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + perf_ibs_set_period(perf_ibs, hwc, &period); + set_bit(IBS_STARTED, pcpu->state); + perf_ibs_enable_event(perf_ibs, hwc, period >> 4); + + perf_event_update_userpage(event); +} + +static void perf_ibs_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + u64 config; + int stopping; + + stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); + + if (!stopping && (hwc->state & PERF_HES_UPTODATE)) + return; + + rdmsrl(hwc->config_base, config); + + if (stopping) { + set_bit(IBS_STOPPING, pcpu->state); + perf_ibs_disable_event(perf_ibs, hwc, config); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* + * Clear valid bit to not count rollovers on update, rollovers + * are only updated in the irq handler. + */ + config &= ~perf_ibs->valid_mask; + + perf_ibs_event_update(perf_ibs, event, &config); + hwc->state |= PERF_HES_UPTODATE; +} + static int perf_ibs_add(struct perf_event *event, int flags) { + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + + if (test_and_set_bit(IBS_ENABLED, pcpu->state)) + return -ENOSPC; + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + pcpu->event = event; + + if (flags & PERF_EF_START) + perf_ibs_start(event, PERF_EF_RELOAD); + return 0; } static void perf_ibs_del(struct perf_event *event, int flags) { + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + + if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) + return; + + perf_ibs_stop(event, PERF_EF_UPDATE); + + pcpu->event = NULL; + + perf_event_update_userpage(event); } -static struct pmu perf_ibs = { - .event_init= perf_ibs_init, - .add= perf_ibs_add, - .del= perf_ibs_del, +static void perf_ibs_read(struct perf_event *event) { } + +static struct perf_ibs perf_ibs_fetch = { + .pmu = { + .task_ctx_nr = perf_invalid_context, + + .event_init = perf_ibs_init, + .add = perf_ibs_add, + .del = perf_ibs_del, + .start = perf_ibs_start, + .stop = perf_ibs_stop, + .read = perf_ibs_read, + }, + .msr = MSR_AMD64_IBSFETCHCTL, + .config_mask = IBS_FETCH_CONFIG_MASK, + .cnt_mask = IBS_FETCH_MAX_CNT, + .enable_mask = IBS_FETCH_ENABLE, + .valid_mask = IBS_FETCH_VAL, + .max_period = IBS_FETCH_MAX_CNT << 4, + .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, + .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, + + .get_count = get_ibs_fetch_count, }; +static struct perf_ibs perf_ibs_op = { + .pmu = { + .task_ctx_nr = perf_invalid_context, + + .event_init = perf_ibs_init, + .add = perf_ibs_add, + .del = perf_ibs_del, + .start = perf_ibs_start, + .stop = perf_ibs_stop, + .read = perf_ibs_read, + }, + .msr = MSR_AMD64_IBSOPCTL, + .config_mask = IBS_OP_CONFIG_MASK, + .cnt_mask = IBS_OP_MAX_CNT, + .enable_mask = IBS_OP_ENABLE, + .valid_mask = IBS_OP_VAL, + .max_period = IBS_OP_MAX_CNT << 4, + .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, + .offset_max = MSR_AMD64_IBSOP_REG_COUNT, + + .get_count = get_ibs_op_count, +}; + +static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) +{ + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + struct perf_event *event = pcpu->event; + struct hw_perf_event *hwc = &event->hw; + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + struct perf_ibs_data ibs_data; + int offset, size, check_rip, offset_max, throttle = 0; + unsigned int msr; + u64 *buf, *config, period; + + if (!test_bit(IBS_STARTED, pcpu->state)) { + /* + * Catch spurious interrupts after stopping IBS: After + * disabling IBS there could be still incomming NMIs + * with samples that even have the valid bit cleared. + * Mark all this NMIs as handled. + */ + return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0; + } + + msr = hwc->config_base; + buf = ibs_data.regs; + rdmsrl(msr, *buf); + if (!(*buf++ & perf_ibs->valid_mask)) + return 0; + + config = &ibs_data.regs[0]; + perf_ibs_event_update(perf_ibs, event, config); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!perf_ibs_set_period(perf_ibs, hwc, &period)) + goto out; /* no sw counter overflow */ + + ibs_data.caps = ibs_caps; + size = 1; + offset = 1; + check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); + if (event->attr.sample_type & PERF_SAMPLE_RAW) + offset_max = perf_ibs->offset_max; + else if (check_rip) + offset_max = 2; + else + offset_max = 1; + do { + rdmsrl(msr + offset, *buf++); + size++; + offset = find_next_bit(perf_ibs->offset_mask, + perf_ibs->offset_max, + offset + 1); + } while (offset < offset_max); + ibs_data.size = sizeof(u64) * size; + + regs = *iregs; + if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { + regs.flags &= ~PERF_EFLAGS_EXACT; + } else { + instruction_pointer_set(®s, ibs_data.regs[1]); + regs.flags |= PERF_EFLAGS_EXACT; + } + + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.size = sizeof(u32) + ibs_data.size; + raw.data = ibs_data.data; + data.raw = &raw; + } + + throttle = perf_event_overflow(event, &data, ®s); +out: + if (throttle) + perf_ibs_disable_event(perf_ibs, hwc, *config); + else + perf_ibs_enable_event(perf_ibs, hwc, period >> 4); + + perf_event_update_userpage(event); + + return 1; +} + +static int __kprobes +perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) +{ + int handled = 0; + + handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); + handled += perf_ibs_handle_irq(&perf_ibs_op, regs); + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) +{ + struct cpu_perf_ibs __percpu *pcpu; + int ret; + + pcpu = alloc_percpu(struct cpu_perf_ibs); + if (!pcpu) + return -ENOMEM; + + perf_ibs->pcpu = pcpu; + + ret = perf_pmu_register(&perf_ibs->pmu, name, -1); + if (ret) { + perf_ibs->pcpu = NULL; + free_percpu(pcpu); + } + + return ret; +} + static __init int perf_event_ibs_init(void) { if (!ibs_caps) return -ENODEV; /* ibs not supported by the cpu */ - perf_pmu_register(&perf_ibs, "ibs", -1); + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + if (ibs_caps & IBS_CAPS_OPCNT) + perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; + perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 26b3e2fef10..166546ec6ae 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 status; int handled; - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); /* @@ -1082,7 +1080,7 @@ again: if (!intel_pmu_save_and_restart(event)) continue; - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 7f64df19e7d..5a3edc27f6e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void) ds->bts_index = ds->bts_buffer_base; - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); regs.ip = 0; /* @@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, if (!intel_pmu_save_and_restart(event)) return; - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); /* * We use the interrupt regs as a base because the PEBS record diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index a2dfacfd710..47124a73dd7 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) handled += overflow; /* event overflow for sure */ - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!x86_perf_event_set_period(event)) continue; + + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ddbb6a901f6..8adf70e9e3c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1084,10 +1084,8 @@ extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); extern const char *perf_pmu_name(void); -extern void __perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task); -extern void __perf_event_task_sched_out(struct task_struct *prev, - struct task_struct *next); +extern void __perf_event_task_sched(struct task_struct *prev, + struct task_struct *next); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -1132,11 +1130,14 @@ struct perf_sample_data { struct perf_branch_stack *br_stack; }; -static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) +static inline void perf_sample_data_init(struct perf_sample_data *data, + u64 addr, u64 period) { + /* remaining struct members initialized in perf_prepare_sample() */ data->addr = addr; data->raw = NULL; data->br_stack = NULL; + data->period = period; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1204,20 +1205,13 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) extern struct static_key_deferred perf_sched_events; -static inline void perf_event_task_sched_in(struct task_struct *prev, +static inline void perf_event_task_sched(struct task_struct *prev, struct task_struct *task) { - if (static_key_false(&perf_sched_events.key)) - __perf_event_task_sched_in(prev, task); -} - -static inline void perf_event_task_sched_out(struct task_struct *prev, - struct task_struct *next) -{ perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); if (static_key_false(&perf_sched_events.key)) - __perf_event_task_sched_out(prev, next); + __perf_event_task_sched(prev, task); } extern void perf_event_mmap(struct vm_area_struct *vma); @@ -1292,11 +1286,8 @@ extern void perf_event_disable(struct perf_event *event); extern void perf_event_task_tick(void); #else static inline void -perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task) { } -static inline void -perf_event_task_sched_out(struct task_struct *prev, - struct task_struct *next) { } +perf_event_task_sched(struct task_struct *prev, + struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } diff --git a/kernel/events/core.c b/kernel/events/core.c index 9789a56b7d5..e82c7a1face 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, * accessing the event control register. If a NMI hits, then it will * not restart the event. */ -void __perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) +static void __perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next) { int ctxn; @@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -void __perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task) +static void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { struct perf_event_context *ctx; int ctxn; @@ -2305,6 +2305,12 @@ void __perf_event_task_sched_in(struct task_struct *prev, perf_branch_stack_sched_in(prev, task); } +void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next) +{ + __perf_event_task_sched_out(prev, next); + __perf_event_task_sched_in(prev, next); +} + static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq; @@ -4957,7 +4963,7 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) if (rctx < 0) return; - perf_sample_data_init(&data, addr); + perf_sample_data_init(&data, addr, 0); do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); @@ -5215,7 +5221,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, .data = record, }; - perf_sample_data_init(&data, addr); + perf_sample_data_init(&data, addr, 0); data.raw = &raw; hlist_for_each_entry_rcu(event, node, head, hlist_entry) { @@ -5318,7 +5324,7 @@ void perf_bp_event(struct perf_event *bp, void *data) struct perf_sample_data sample; struct pt_regs *regs = data; - perf_sample_data_init(&sample, bp->attr.bp_addr); + perf_sample_data_init(&sample, bp->attr.bp_addr, 0); if (!bp->hw.state && !perf_exclude_event(bp, regs)) perf_swevent_event(bp, 1, &sample, regs); @@ -5344,8 +5350,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event->pmu->read(event); - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); regs = get_irq_regs(); if (regs && !perf_exclude_event(event, regs)) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4603b9d8f30..5c692a0a555 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1913,7 +1913,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { sched_info_switch(prev, next); - perf_event_task_sched_out(prev, next); + perf_event_task_sched(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); @@ -1956,13 +1956,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_disable(); -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ - perf_event_task_sched_in(prev, current); -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_enable(); -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 32ac1165100..cde4d0f0ddb 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -593,6 +593,15 @@ unsigned int ui_browser__argv_refresh(struct ui_browser *browser) return row; } +void __ui_browser__vline(struct ui_browser *browser, unsigned int column, + u16 start, u16 end) +{ + SLsmg_set_char_set(1); + ui_browser__gotorc(browser, start, column); + SLsmg_draw_vline(end - start + 1); + SLsmg_set_char_set(0); +} + void ui_browser__write_graph(struct ui_browser *browser __used, int graph) { SLsmg_set_char_set(1); @@ -600,8 +609,9 @@ void ui_browser__write_graph(struct ui_browser *browser __used, int graph) SLsmg_set_char_set(0); } -void __ui_browser__line_arrow_up(struct ui_browser *browser, unsigned int column, - u64 start, u64 end, int start_width) +static void __ui_browser__line_arrow_up(struct ui_browser *browser, + unsigned int column, + u64 start, u64 end) { unsigned int row, end_row; @@ -612,7 +622,7 @@ void __ui_browser__line_arrow_up(struct ui_browser *browser, unsigned int column ui_browser__gotorc(browser, row, column); SLsmg_write_char(SLSMG_LLCORN_CHAR); ui_browser__gotorc(browser, row, column + 1); - SLsmg_draw_hline(start_width); + SLsmg_draw_hline(2); if (row-- == 0) goto out; @@ -639,6 +649,55 @@ out: SLsmg_set_char_set(0); } +static void __ui_browser__line_arrow_down(struct ui_browser *browser, + unsigned int column, + u64 start, u64 end) +{ + unsigned int row, end_row; + + SLsmg_set_char_set(1); + + if (start >= browser->top_idx) { + row = start - browser->top_idx; + ui_browser__gotorc(browser, row, column); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, row, column + 1); + SLsmg_draw_hline(2); + + if (row++ == 0) + goto out; + } else + row = 0; + + if (end >= browser->top_idx + browser->height) + end_row = browser->height - 1; + else + end_row = end - browser->top_idx;; + + ui_browser__gotorc(browser, row, column); + SLsmg_draw_vline(end_row - row + 1); + + ui_browser__gotorc(browser, end_row, column); + if (end < browser->top_idx + browser->height) { + SLsmg_write_char(SLSMG_LLCORN_CHAR); + ui_browser__gotorc(browser, end_row, column + 1); + SLsmg_write_char(SLSMG_HLINE_CHAR); + ui_browser__gotorc(browser, end_row, column + 2); + SLsmg_write_char(SLSMG_RARROW_CHAR); + } +out: + SLsmg_set_char_set(0); +} + +void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, + u64 start, u64 end) +{ + if (start > end) + __ui_browser__line_arrow_up(browser, column, start, end); + else + __ui_browser__line_arrow_down(browser, column, start, end); +} + void ui_browser__init(void) { int i = 0; diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 2f226cb79f6..dd96d822990 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -38,8 +38,8 @@ void ui_browser__reset_index(struct ui_browser *self); void ui_browser__gotorc(struct ui_browser *self, int y, int x); void ui_browser__write_graph(struct ui_browser *browser, int graph); -void __ui_browser__line_arrow_up(struct ui_browser *browser, unsigned int column, - u64 start, u64 end, int start_width); +void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, + u64 start, u64 end); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *self, const char *title, @@ -49,6 +49,8 @@ int ui_browser__refresh(struct ui_browser *self); int ui_browser__run(struct ui_browser *browser, int delay_secs); void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries); void ui_browser__handle_resize(struct ui_browser *browser); +void __ui_browser__vline(struct ui_browser *browser, unsigned int column, + u16 start, u16 end); int ui_browser__warning(struct ui_browser *browser, int timeout, const char *format, ...); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 077380baa1c..f171b4627cb 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -30,8 +30,11 @@ struct annotate_browser { int nr_entries; bool hide_src_code; bool use_offset; + bool jump_arrows; bool searching_backwards; - u8 offset_width; + u8 addr_width; + u8 min_addr_width; + u8 max_addr_width; char search_bf[128]; }; @@ -61,47 +64,46 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro bool change_color = (!ab->hide_src_code && (!current_entry || (self->use_navkeypressed && !self->navkeypressed))); - int width = self->width; + int width = self->width, printed; + char bf[256]; - if (dl->offset != -1) { + if (dl->offset != -1 && bdl->percent != 0.0) { ui_browser__set_percent_color(self, bdl->percent, current_entry); - slsmg_printf(" %7.2f ", bdl->percent); + slsmg_printf("%6.2f ", bdl->percent); } else { ui_browser__set_percent_color(self, 0, current_entry); - slsmg_write_nstring(" ", 9); + slsmg_write_nstring(" ", 7); } - ui_browser__write_graph(self, SLSMG_VLINE_CHAR); SLsmg_write_char(' '); /* The scroll bar isn't being used */ if (!self->navkeypressed) width += 1; - if (dl->offset != -1 && change_color) - ui_browser__set_color(self, HE_COLORSET_CODE); - if (!*dl->line) - slsmg_write_nstring(" ", width - 10); - else if (dl->offset == -1) - slsmg_write_nstring(dl->line, width - 10); - else { - char bf[256]; + slsmg_write_nstring(" ", width - 7); + else if (dl->offset == -1) { + printed = scnprintf(bf, sizeof(bf), "%*s ", + ab->addr_width, " "); + slsmg_write_nstring(bf, printed); + slsmg_write_nstring(dl->line, width - printed - 6); + } else { u64 addr = dl->offset; - int printed, color = -1; + int color = -1; if (!ab->use_offset) addr += ab->start; if (!ab->use_offset) { - printed = scnprintf(bf, sizeof(bf), " %" PRIx64 ":", addr); + printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { if (bdl->jump_target) { - printed = scnprintf(bf, sizeof(bf), " %*" PRIx64 ":", - ab->offset_width, addr); + printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", + ab->addr_width, addr); } else { - printed = scnprintf(bf, sizeof(bf), " %*s ", - ab->offset_width, " "); + printed = scnprintf(bf, sizeof(bf), "%*s ", + ab->addr_width, " "); } } @@ -117,12 +119,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro ui_browser__write_graph(self, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); SLsmg_write_char(' '); + } else if (ins__is_call(dl->ins)) { + ui_browser__write_graph(self, SLSMG_RARROW_CHAR); + SLsmg_write_char(' '); } else { slsmg_write_nstring(" ", 2); } - - dl->ins->ops->scnprintf(dl->ins, bf, sizeof(bf), &dl->ops, - !ab->use_offset); } else { if (strcmp(dl->name, "retq")) { slsmg_write_nstring(" ", 2); @@ -130,68 +132,56 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro ui_browser__write_graph(self, SLSMG_LARROW_CHAR); SLsmg_write_char(' '); } - - scnprintf(bf, sizeof(bf), "%-6.6s %s", dl->name, dl->ops.raw); } - slsmg_write_nstring(bf, width - 12 - printed); + disasm_line__scnprintf(dl, bf, sizeof(bf), !ab->use_offset); + slsmg_write_nstring(bf, width - 10 - printed); } if (current_entry) ab->selection = dl; } -static void annotate_browser__draw_current_loop(struct ui_browser *browser) +static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct map_symbol *ms = browser->priv; - struct symbol *sym = ms->sym; - struct annotation *notes = symbol__annotation(sym); - struct disasm_line *cursor = ab->selection, *pos = cursor, *target; - struct browser_disasm_line *bcursor = disasm_line__browser(cursor), - *btarget, *bpos; - unsigned int from, to, start_width = 2; - - list_for_each_entry_from(pos, ¬es->src->source, node) { - if (!pos->ins || !ins__is_jump(pos->ins) || - !disasm_line__has_offset(pos)) - continue; + struct disasm_line *cursor = ab->selection, *target; + struct browser_disasm_line *btarget, *bcursor; + unsigned int from, to; - target = ab->offsets[pos->ops.target.offset]; - if (!target) - continue; + if (!cursor->ins || !ins__is_jump(cursor->ins) || + !disasm_line__has_offset(cursor)) + return; - btarget = disasm_line__browser(target); - if (btarget->idx <= bcursor->idx) - goto found; - } + target = ab->offsets[cursor->ops.target.offset]; + if (!target) + return; - return; + bcursor = disasm_line__browser(cursor); + btarget = disasm_line__browser(target); -found: - bpos = disasm_line__browser(pos); if (ab->hide_src_code) { - from = bpos->idx_asm; + from = bcursor->idx_asm; to = btarget->idx_asm; } else { - from = (u64)bpos->idx; + from = (u64)bcursor->idx; to = (u64)btarget->idx; } ui_browser__set_color(browser, HE_COLORSET_CODE); - - if (!bpos->jump_target) - start_width += ab->offset_width + 1; - - __ui_browser__line_arrow_up(browser, 10, from, to, start_width); + __ui_browser__line_arrow(browser, 9 + ab->addr_width, from, to); } static unsigned int annotate_browser__refresh(struct ui_browser *browser) { + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); int ret = ui_browser__list_head_refresh(browser); - annotate_browser__draw_current_loop(browser); + if (ab->jump_arrows) + annotate_browser__draw_current_jump(browser); + ui_browser__set_color(browser, HE_COLORSET_NORMAL); + __ui_browser__vline(browser, 7, 0, browser->height - 1); return ret; } @@ -624,6 +614,13 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, case 'O': case 'o': self->use_offset = !self->use_offset; + if (self->use_offset) + self->addr_width = self->min_addr_width; + else + self->addr_width = self->max_addr_width; + continue; + case 'j': + self->jump_arrows = !self->jump_arrows; continue; case '/': if (annotate_browser__search(self, delay_secs)) { @@ -736,6 +733,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, .use_navkeypressed = true, }, .use_offset = true, + .jump_arrows = true, }; int ret = -1; @@ -786,7 +784,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, annotate_browser__mark_jump_targets(&browser, size); - browser.offset_width = hex_width(size); + browser.addr_width = browser.min_addr_width = hex_width(size); + browser.max_addr_width = hex_width(sym->end); browser.b.nr_entries = browser.nr_entries; browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5eb34123f55..6b4146b40a2 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -18,6 +18,21 @@ const char *disassembler_style; +static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); +} + +int ins__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops) +{ + if (ins->ops->scnprintf) + return ins->ops->scnprintf(ins, bf, size, ops); + + return ins__raw_scnprintf(ins, bf, size, ops); +} + static int call__parse(struct ins_operands *ops) { char *endptr, *tok, *name; @@ -50,11 +65,8 @@ indirect_call: } static int call__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, bool addrs) + struct ins_operands *ops) { - if (addrs) - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); - if (ops->target.name) return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); @@ -86,11 +98,8 @@ static int jump__parse(struct ins_operands *ops) } static int jump__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, bool addrs) + struct ins_operands *ops) { - if (addrs) - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); - return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); } @@ -104,6 +113,16 @@ bool ins__is_jump(const struct ins *ins) return ins->ops == &jump_ops; } +static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size, + struct ins_operands *ops __used) +{ + return scnprintf(bf, size, "%-6.6s", "nop"); +} + +static struct ins_ops nop_ops = { + .scnprintf = nop__scnprintf, +}; + /* * Must be sorted by name! */ @@ -145,6 +164,9 @@ static struct ins instructions[] = { { .name = "jrcxz", .ops = &jump_ops, }, { .name = "js", .ops = &jump_ops, }, { .name = "jz", .ops = &jump_ops, }, + { .name = "nop", .ops = &nop_ops, }, + { .name = "nopl", .ops = &nop_ops, }, + { .name = "nopw", .ops = &nop_ops, }, }; static int ins__cmp(const void *name, const void *insp) @@ -296,6 +318,14 @@ void disasm_line__free(struct disasm_line *dl) free(dl); } +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) +{ + if (raw || !dl->ins) + return scnprintf(bf, size, "%-6.6s %s", dl->name, dl->ops.raw); + + return ins__scnprintf(dl->ins, bf, size, &dl->ops); +} + static void disasm__add(struct list_head *head, struct disasm_line *line) { list_add_tail(&line->node, head); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 13a21f10dab..bb0a9f27165 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -22,7 +22,7 @@ struct ins_operands { struct ins_ops { int (*parse)(struct ins_operands *ops); int (*scnprintf)(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, bool addrs); + struct ins_operands *ops); }; struct ins { @@ -32,6 +32,7 @@ struct ins { bool ins__is_jump(const struct ins *ins); bool ins__is_call(const struct ins *ins); +int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); struct disasm_line { struct list_head node; @@ -49,6 +50,7 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl) void disasm_line__free(struct disasm_line *dl); struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); struct sym_hist { |