summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-11 13:22:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-11 13:22:43 -0700
commit4f0ac854167846bd55cd81dbc9a36e03708aa01c (patch)
tree0eb34d18a667f8e68ad9255f791560b028fed2a6 /arch/x86/kernel
parentb9356c53ba2f593081e5aa45eb67adcce243d1c0 (diff)
parent6b58e7f146f8d79c08f62087f928e1f01747de71 (diff)
Merge branch 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (60 commits) perf tools: Avoid unnecessary work in directory lookups perf stat: Clean up statistics calculations a bit more perf stat: More advanced variance computation perf stat: Use stddev_mean in stead of stddev perf stat: Remove the limit on repeat perf stat: Change noise calculation to use stddev x86, perf_counter, bts: Do not allow kernel BTS tracing for now x86, perf_counter, bts: Correct pointer-to-u64 casts x86, perf_counter, bts: Fail if BTS is not available perf_counter: Fix output-sharing error path perf trace: Fix read_string() perf trace: Print out in nanoseconds perf tools: Seek to the end of the header area perf trace: Fix parsing of perf.data perf trace: Sample timestamps as well perf_counter: Introduce new (non-)paranoia level to allow raw tracepoint access perf trace: Sample the CPU too perf tools: Work around strict aliasing related warnings perf tools: Clean up warnings list in the Makefile perf tools: Complete support for dynamic strings ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c329
1 files changed, 323 insertions, 6 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 900332b800f..f9cd0849bd4 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -6,6 +6,7 @@
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
*
* For licencing details see kernel-base/COPYING
*/
@@ -20,6 +21,7 @@
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
+#include <linux/cpu.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -27,12 +29,52 @@
static u64 perf_counter_mask __read_mostly;
+/* The maximal number of PEBS counters: */
+#define MAX_PEBS_COUNTERS 4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE 24
+
+/* The size of a per-cpu BTS buffer in bytes: */
+#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
+
+/* The BTS overflow threshold in bytes from the end of the buffer: */
+#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
+
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR (1 << 6)
+#define X86_DEBUGCTL_BTS (1 << 7)
+#define X86_DEBUGCTL_BTINT (1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+ u64 bts_buffer_base;
+ u64 bts_index;
+ u64 bts_absolute_maximum;
+ u64 bts_interrupt_threshold;
+ u64 pebs_buffer_base;
+ u64 pebs_index;
+ u64 pebs_absolute_maximum;
+ u64 pebs_interrupt_threshold;
+ u64 pebs_counter_reset[MAX_PEBS_COUNTERS];
+};
+
struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
int enabled;
+ struct debug_store *ds;
};
/*
@@ -58,6 +100,8 @@ struct x86_pmu {
int apic;
u64 max_period;
u64 intel_ctrl;
+ void (*enable_bts)(u64 config);
+ void (*disable_bts)(void);
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -577,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter,
u64 prev_raw_count, new_raw_count;
s64 delta;
+ if (idx == X86_PMC_IDX_FIXED_BTS)
+ return 0;
+
/*
* Careful: an NMI might modify the previous counter value.
*
@@ -666,10 +713,110 @@ static void release_pmc_hardware(void)
#endif
}
+static inline bool bts_available(void)
+{
+ return x86_pmu.enable_bts != NULL;
+}
+
+static inline void init_debug_store_on_cpu(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+
+ if (!ds)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+ (u32)((u64)(unsigned long)ds),
+ (u32)((u64)(unsigned long)ds >> 32));
+}
+
+static inline void fini_debug_store_on_cpu(int cpu)
+{
+ if (!per_cpu(cpu_hw_counters, cpu).ds)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_bts_hardware(void)
+{
+ int cpu;
+
+ if (!bts_available())
+ return;
+
+ get_online_cpus();
+
+ for_each_online_cpu(cpu)
+ fini_debug_store_on_cpu(cpu);
+
+ for_each_possible_cpu(cpu) {
+ struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+
+ if (!ds)
+ continue;
+
+ per_cpu(cpu_hw_counters, cpu).ds = NULL;
+
+ kfree((void *)(unsigned long)ds->bts_buffer_base);
+ kfree(ds);
+ }
+
+ put_online_cpus();
+}
+
+static int reserve_bts_hardware(void)
+{
+ int cpu, err = 0;
+
+ if (!bts_available())
+ return 0;
+
+ get_online_cpus();
+
+ for_each_possible_cpu(cpu) {
+ struct debug_store *ds;
+ void *buffer;
+
+ err = -ENOMEM;
+ buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+ if (unlikely(!buffer))
+ break;
+
+ ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+ if (unlikely(!ds)) {
+ kfree(buffer);
+ break;
+ }
+
+ ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ ds->bts_index = ds->bts_buffer_base;
+ ds->bts_absolute_maximum =
+ ds->bts_buffer_base + BTS_BUFFER_SIZE;
+ ds->bts_interrupt_threshold =
+ ds->bts_absolute_maximum - BTS_OVFL_TH;
+
+ per_cpu(cpu_hw_counters, cpu).ds = ds;
+ err = 0;
+ }
+
+ if (err)
+ release_bts_hardware();
+ else {
+ for_each_online_cpu(cpu)
+ init_debug_store_on_cpu(cpu);
+ }
+
+ put_online_cpus();
+
+ return err;
+}
+
static void hw_perf_counter_destroy(struct perf_counter *counter)
{
if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
release_pmc_hardware();
+ release_bts_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -712,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
return 0;
}
+static void intel_pmu_enable_bts(u64 config)
+{
+ unsigned long debugctlmsr;
+
+ debugctlmsr = get_debugctlmsr();
+
+ debugctlmsr |= X86_DEBUGCTL_TR;
+ debugctlmsr |= X86_DEBUGCTL_BTS;
+ debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+ if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+ debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+ if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+ debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+ update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long debugctlmsr;
+
+ if (!cpuc->ds)
+ return;
+
+ debugctlmsr = get_debugctlmsr();
+
+ debugctlmsr &=
+ ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+ X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+ update_debugctlmsr(debugctlmsr);
+}
+
/*
* Setup the hardware configuration for a given attr_type
*/
@@ -728,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
err = 0;
if (!atomic_inc_not_zero(&active_counters)) {
mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
- err = -EBUSY;
- else
+ if (atomic_read(&active_counters) == 0) {
+ if (!reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ err = reserve_bts_hardware();
+ }
+ if (!err)
atomic_inc(&active_counters);
mutex_unlock(&pmc_reserve_mutex);
}
@@ -793,6 +980,20 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
if (config == -1LL)
return -EINVAL;
+ /*
+ * Branch tracing:
+ */
+ if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+ (hwc->sample_period == 1)) {
+ /* BTS is not supported by this architecture. */
+ if (!bts_available())
+ return -EOPNOTSUPP;
+
+ /* BTS is currently only allowed for user-mode. */
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+ return -EOPNOTSUPP;
+ }
+
hwc->config |= config;
return 0;
@@ -817,7 +1018,18 @@ static void p6_pmu_disable_all(void)
static void intel_pmu_disable_all(void)
{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->enabled = 0;
+ barrier();
+
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ intel_pmu_disable_bts();
}
static void amd_pmu_disable_all(void)
@@ -875,7 +1087,25 @@ static void p6_pmu_enable_all(void)
static void intel_pmu_enable_all(void)
{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ return;
+
+ cpuc->enabled = 1;
+ barrier();
+
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+ if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ struct perf_counter *counter =
+ cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+
+ if (WARN_ON_ONCE(!counter))
+ return;
+
+ intel_pmu_enable_bts(counter->hw.config);
+ }
}
static void amd_pmu_enable_all(void)
@@ -962,6 +1192,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
static inline void
intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+ intel_pmu_disable_bts();
+ return;
+ }
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc, idx);
return;
@@ -990,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter,
s64 period = hwc->sample_period;
int err, ret = 0;
+ if (idx == X86_PMC_IDX_FIXED_BTS)
+ return 0;
+
/*
* If we are way outside a reasoable range then just skip forward:
*/
@@ -1072,6 +1310,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+ if (!__get_cpu_var(cpu_hw_counters).enabled)
+ return;
+
+ intel_pmu_enable_bts(hwc->config);
+ return;
+ }
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc, idx);
return;
@@ -1093,11 +1339,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
{
unsigned int event;
+ event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+ if (unlikely((event ==
+ x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+ (hwc->sample_period == 1)))
+ return X86_PMC_IDX_FIXED_BTS;
+
if (!x86_pmu.num_counters_fixed)
return -1;
- event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1118,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter)
int idx;
idx = fixed_mode_idx(counter, hwc);
- if (idx >= 0) {
+ if (idx == X86_PMC_IDX_FIXED_BTS) {
+ /* BTS is already occupied. */
+ if (test_and_set_bit(idx, cpuc->used_mask))
+ return -EAGAIN;
+
+ hwc->config_base = 0;
+ hwc->counter_base = 0;
+ hwc->idx = idx;
+ } else if (idx >= 0) {
/*
* Try to get the fixed counter, if that is already taken
* then try to get a generic counter:
@@ -1229,6 +1488,44 @@ void perf_counter_print_debug(void)
local_irq_restore(flags);
}
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
+ struct perf_sample_data *data)
+{
+ struct debug_store *ds = cpuc->ds;
+ struct bts_record {
+ u64 from;
+ u64 to;
+ u64 flags;
+ };
+ struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+ unsigned long orig_ip = data->regs->ip;
+ struct bts_record *at, *top;
+
+ if (!counter)
+ return;
+
+ if (!ds)
+ return;
+
+ at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+ top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+ ds->bts_index = ds->bts_buffer_base;
+
+ for (; at < top; at++) {
+ data->regs->ip = at->from;
+ data->addr = at->to;
+
+ perf_counter_output(counter, 1, data);
+ }
+
+ data->regs->ip = orig_ip;
+ data->addr = 0;
+
+ /* There's new data available. */
+ counter->pending_kill = POLL_IN;
+}
+
static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1253,6 +1550,15 @@ static void x86_pmu_disable(struct perf_counter *counter)
* that we are disabling:
*/
x86_perf_counter_update(counter, hwc, idx);
+
+ /* Drain the remaining BTS records. */
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+ struct perf_sample_data data;
+ struct pt_regs regs;
+
+ data.regs = &regs;
+ intel_pmu_drain_bts_buffer(cpuc, &data);
+ }
cpuc->counters[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
@@ -1280,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter)
static void intel_pmu_reset(void)
{
+ struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
unsigned long flags;
int idx;
@@ -1297,6 +1604,8 @@ static void intel_pmu_reset(void)
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
}
+ if (ds)
+ ds->bts_index = ds->bts_buffer_base;
local_irq_restore(flags);
}
@@ -1362,6 +1671,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_counters);
perf_disable();
+ intel_pmu_drain_bts_buffer(cpuc, &data);
status = intel_pmu_get_status();
if (!status) {
perf_enable();
@@ -1571,6 +1881,8 @@ static struct x86_pmu intel_pmu = {
* the generic counter period:
*/
.max_period = (1ULL << 31) - 1,
+ .enable_bts = intel_pmu_enable_bts,
+ .disable_bts = intel_pmu_disable_bts,
};
static struct x86_pmu amd_pmu = {
@@ -1962,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
+
+void hw_perf_counter_setup_online(int cpu)
+{
+ init_debug_store_on_cpu(cpu);
+}