summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c29
-rw-r--r--arch/x86/kernel/cpu/common.c11
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
10 files changed, 619 insertions, 51 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0a44b90602b..146bb6218ee 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -26,7 +26,8 @@
* contact AMD for precise details and a CPU swap.
*
* See http://www.multimania.com/poulot/k6bug.html
- * http://www.amd.com/K6/k6docs/revgd.html
+ * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6"
+ * (Publication # 21266 Issue Date: August 1998)
*
* The following test is erm.. interesting. AMD neglected to up
* the chip setting when fixing the bug but they also tweaked some
@@ -94,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
"system stability may be impaired when more than 32 MB are used.\n");
else
printk(KERN_CONT "probably OK (after B9730xxxx).\n");
- printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
}
/* K6 with old style WHCR */
@@ -353,10 +353,11 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
node = per_cpu(cpu_llc_id, cpu);
/*
- * If core numbers are inconsistent, it's likely a multi-fabric platform,
- * so invoke platform-specific handler
+ * On multi-fabric platform (e.g. Numascale NumaChip) a
+ * platform-specific handler needs to be called to fixup some
+ * IDs of the CPU.
*/
- if (c->phys_proc_id != node)
+ if (x86_cpuinit.fixup_cpu_id)
x86_cpuinit.fixup_cpu_id(c, node);
if (!node_online(node)) {
@@ -579,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
+ /* re-enable TopologyExtensions if switched off by BIOS */
+ if ((c->x86 == 0x15) &&
+ (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
+ !cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ u64 val;
+
+ if (!rdmsrl_amd_safe(0xc0011005, &val)) {
+ val |= 1ULL << 54;
+ wrmsrl_amd_safe(0xc0011005, val);
+ rdmsrl(0xc0011005, val);
+ if (val & (1ULL << 54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ printk(KERN_INFO FW_INFO "CPU: Re-enabling "
+ "disabled Topology Extensions Support\n");
+ }
+ }
+ }
+
cpu_detect_cache_sizes(c);
/* Multi core CPU? */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 67e258362a3..82f29e70d05 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1163,15 +1163,6 @@ static void dbg_restore_debug_regs(void)
#endif /* ! CONFIG_KGDB */
/*
- * Prints an error where the NUMA and configured core-number mismatch and the
- * platform didn't override this to fix it up
- */
-void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
-{
- pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
-}
-
-/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
@@ -1194,7 +1185,7 @@ void __cpuinit cpu_init(void)
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
- if (cpu != 0 && percpu_read(numa_node) == 0 &&
+ if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
early_cpu_to_node(cpu) != NUMA_NO_NODE)
set_numa_node(early_cpu_to_node(cpu));
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 73d08ed98a6..b8f3653dddb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
/* check if @slot is already used or the index is already disabled */
ret = amd_get_l3_disable_slot(nb, slot);
if (ret >= 0)
- return -EINVAL;
+ return -EEXIST;
if (index > nb->l3_cache.indices)
return -EINVAL;
/* check whether the other slot has disabled the same index already */
if (index == amd_get_l3_disable_slot(nb, !slot))
- return -EINVAL;
+ return -EEXIST;
amd_l3_disable_index(nb, cpu, slot, index);
@@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
if (err) {
if (err == -EEXIST)
- printk(KERN_WARNING "L3 disable slot %d in use!\n",
- slot);
+ pr_warning("L3 slot %d in use/index already disabled!\n",
+ slot);
return err;
}
return count;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d086a09c087..297edb1b1fb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -583,7 +583,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
struct mce m;
int i;
- percpu_inc(mce_poll_count);
+ this_cpu_inc(mce_poll_count);
mce_gather_info(&m, NULL);
@@ -945,9 +945,10 @@ struct mce_info {
atomic_t inuse;
struct task_struct *t;
__u64 paddr;
+ int restartable;
} mce_info[MCE_INFO_MAX];
-static void mce_save_info(__u64 addr)
+static void mce_save_info(__u64 addr, int c)
{
struct mce_info *mi;
@@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr)
if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
mi->t = current;
mi->paddr = addr;
+ mi->restartable = c;
return;
}
}
@@ -1015,7 +1017,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
atomic_inc(&mce_entry);
- percpu_inc(mce_exception_count);
+ this_cpu_inc(mce_exception_count);
if (!banks)
goto out;
@@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
/* schedule action before return to userland */
- mce_save_info(m.addr);
+ mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
set_thread_flag(TIF_MCE_NOTIFY);
} else if (kill_it) {
force_sig(SIGBUS, current);
@@ -1179,7 +1181,13 @@ void mce_notify_process(void)
pr_err("Uncorrected hardware memory error in user-access at %llx",
mi->paddr);
- if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ /*
+ * We must call memory_failure() here even if the current process is
+ * doomed. We still need to mark the page as poisoned and alert any
+ * other users of the page.
+ */
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
+ mi->restartable == 0) {
pr_err("Memory error not recovered");
force_sig(SIGBUS, current);
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e1..e049d6da018 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
-
- /* mark not used */
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
* event overflow
*/
handled++;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (!x86_perf_event_set_period(event))
continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1c5f0..65652265fff 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
static int amd_pmu_hw_config(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
+ int ret;
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ ret = x86_pmu_hw_config(event);
if (ret)
return ret;
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
* when we come here
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (nb->owners[i] == event) {
- cmpxchg(nb->owners+i, event, NULL);
+ if (cmpxchg(nb->owners + i, event, NULL) == event)
break;
- }
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14..da9bcdcd985 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
#include <linux/perf_event.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/ptrace.h>
#include <asm/apic.h>
@@ -16,36 +17,591 @@ static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-static struct pmu perf_ibs;
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+enum ibs_states {
+ IBS_ENABLED = 0,
+ IBS_STARTED = 1,
+ IBS_STOPPING = 2,
+
+ IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+ struct perf_event *event;
+ unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+ u64 (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left < (s64)min)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ /*
+ * If the hw period that triggers the sw overflow is too short
+ * we might hit the irq handler. This biases the results.
+ * Thus we shorten the next-to-last period and set the last
+ * period to the max period.
+ */
+ if (left > max) {
+ left -= max;
+ if (left > max)
+ left = max;
+ else if (left < min)
+ left = min;
+ }
+
+ *hw_period = (u64)left;
+
+ return overflow;
+}
+
+static int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - width;
+ u64 prev_raw_count;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ return 0;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
static int perf_ibs_init(struct perf_event *event)
{
- if (perf_ibs.type != event->attr.type)
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+ int ret;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
+
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+ /*
+ * lower 4 bits can not be set in ibs max cnt,
+ * but allowing it in case we adjust the
+ * sample period to set a frequency.
+ */
+ return -EINVAL;
+ hwc->sample_period &= ~0x0FULL;
+ if (!hwc->sample_period)
+ hwc->sample_period = 0x10;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ config &= ~perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * If we modify hwc->sample_period, we also need to update
+ * hwc->last_period and hwc->period_left.
+ */
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
return 0;
}
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 *period)
+{
+ int overflow;
+
+ /* ignore lower 4 bits in min count: */
+ overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ local64_set(&hwc->prev_count, 0);
+
+ return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+ return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+ u64 count = 0;
+
+ if (config & IBS_OP_VAL)
+ count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+ if (ibs_caps & IBS_CAPS_RDWROPCNT)
+ count += (config & IBS_OP_CUR_CNT) >> 32;
+
+ return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+ u64 *config)
+{
+ u64 count = perf_ibs->get_count(*config);
+
+ /*
+ * Set width to 64 since we do not overflow on max width but
+ * instead on max count. In perf_ibs_set_period() we clear
+ * prev count manually on overflow.
+ */
+ while (!perf_event_try_update(event, count, 64)) {
+ rdmsrl(event->hw.config_base, *config);
+ count = perf_ibs->get_count(*config);
+ }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ config &= ~perf_ibs->cnt_mask;
+ wrmsrl(hwc->config_base, config);
+ config &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 period;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ perf_ibs_set_period(perf_ibs, hwc, &period);
+ set_bit(IBS_STARTED, pcpu->state);
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 config;
+ int stopping;
+
+ stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+ if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+ return;
+
+ rdmsrl(hwc->config_base, config);
+
+ if (stopping) {
+ set_bit(IBS_STOPPING, pcpu->state);
+ perf_ibs_disable_event(perf_ibs, hwc, config);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /*
+ * Clear valid bit to not count rollovers on update, rollovers
+ * are only updated in the irq handler.
+ */
+ config &= ~perf_ibs->valid_mask;
+
+ perf_ibs_event_update(perf_ibs, event, &config);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
static int perf_ibs_add(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+ return -ENOSPC;
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ pcpu->event = event;
+
+ if (flags & PERF_EF_START)
+ perf_ibs_start(event, PERF_EF_RELOAD);
+
return 0;
}
static void perf_ibs_del(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+ return;
+
+ perf_ibs_stop(event, PERF_EF_UPDATE);
+
+ pcpu->event = NULL;
+
+ perf_event_update_userpage(event);
}
-static struct pmu perf_ibs = {
- .event_init= perf_ibs_init,
- .add= perf_ibs_add,
- .del= perf_ibs_del,
+static void perf_ibs_read(struct perf_event *event) { }
+
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+ .valid_mask = IBS_FETCH_VAL,
+ .max_period = IBS_FETCH_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
+ .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+
+ .get_count = get_ibs_fetch_count,
};
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
+ .valid_mask = IBS_OP_VAL,
+ .max_period = IBS_OP_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
+ .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+
+ .get_count = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ struct perf_event *event = pcpu->event;
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ struct perf_ibs_data ibs_data;
+ int offset, size, check_rip, offset_max, throttle = 0;
+ unsigned int msr;
+ u64 *buf, *config, period;
+
+ if (!test_bit(IBS_STARTED, pcpu->state)) {
+ /*
+ * Catch spurious interrupts after stopping IBS: After
+ * disabling IBS there could be still incomming NMIs
+ * with samples that even have the valid bit cleared.
+ * Mark all this NMIs as handled.
+ */
+ return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+ }
+
+ msr = hwc->config_base;
+ buf = ibs_data.regs;
+ rdmsrl(msr, *buf);
+ if (!(*buf++ & perf_ibs->valid_mask))
+ return 0;
+
+ config = &ibs_data.regs[0];
+ perf_ibs_event_update(perf_ibs, event, config);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+ goto out; /* no sw counter overflow */
+
+ ibs_data.caps = ibs_caps;
+ size = 1;
+ offset = 1;
+ check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+ if (event->attr.sample_type & PERF_SAMPLE_RAW)
+ offset_max = perf_ibs->offset_max;
+ else if (check_rip)
+ offset_max = 2;
+ else
+ offset_max = 1;
+ do {
+ rdmsrl(msr + offset, *buf++);
+ size++;
+ offset = find_next_bit(perf_ibs->offset_mask,
+ perf_ibs->offset_max,
+ offset + 1);
+ } while (offset < offset_max);
+ ibs_data.size = sizeof(u64) * size;
+
+ regs = *iregs;
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
+ instruction_pointer_set(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }
+
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.size = sizeof(u32) + ibs_data.size;
+ raw.data = ibs_data.data;
+ data.raw = &raw;
+ }
+
+ throttle = perf_event_overflow(event, &data, &regs);
+out:
+ if (throttle)
+ perf_ibs_disable_event(perf_ibs, hwc, *config);
+ else
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+
+ return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ int handled = 0;
+
+ handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+ handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+ struct cpu_perf_ibs __percpu *pcpu;
+ int ret;
+
+ pcpu = alloc_percpu(struct cpu_perf_ibs);
+ if (!pcpu)
+ return -ENOMEM;
+
+ perf_ibs->pcpu = pcpu;
+
+ ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+ if (ret) {
+ perf_ibs->pcpu = NULL;
+ free_percpu(pcpu);
+ }
+
+ return ret;
+}
+
static __init int perf_event_ibs_init(void)
{
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs, "ibs", -1);
+ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+ register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef10..166546ec6ae 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 status;
int handled;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1082,7 +1080,7 @@ again:
if (!intel_pmu_save_and_restart(event))
continue;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7d..5a3edc27f6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
ds->bts_index = ds->bts_buffer_base;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs.ip = 0;
/*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (!intel_pmu_save_and_restart(event))
return;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
/*
* We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd710..47124a73dd7 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
handled += overflow;
/* event overflow for sure */
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!x86_perf_event_set_period(event))
continue;
+
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}