From e97df76377b8b3b1f7dfd5d6f8a1d5a31438b140 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Feb 2014 20:48:51 +0100 Subject: perf/x86/intel/p6: Add userspace RDPMC quirk for PPro PPro machines can die hard when PCE gets enabled due to a CPU erratum. The safe way it so disable it by default and keep it disabled. See erratum 26 in: http://download.intel.com/design/archives/processors/pro/docs/24268935.pdf Reported-and-Tested-by: Mark Davies Cc: Alan Cox Cc: Stephane Eranian Cc: Vince Weaver Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140206170815.GW2936@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b88645191fe..1246b853c4e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1521,6 +1521,8 @@ static int __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); + x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) quirk->func(); @@ -1534,7 +1536,6 @@ static int __init init_hw_perf_events(void) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 0, x86_pmu.num_counters, 0, 0); - x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; if (x86_pmu.event_attrs) @@ -1820,6 +1821,9 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (ret) return ret; + if (x86_pmu.attr_rdpmc_broken) + return -ENOTSUPP; + if (!!val != !!x86_pmu.attr_rdpmc) { x86_pmu.attr_rdpmc = !!val; smp_call_function(change_rdpmc, (void *)val, 1); -- cgit v1.2.3-70-g09d2 From 0e9f2204cfa6d79abe3e525ddf7c4ab5792cc751 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Feb 2014 11:19:56 +0100 Subject: perf/x86: Fix Userspace RDPMC switch The current code forgets to change the CR4 state on the current CPU. Use on_each_cpu() instead of smp_call_function(). Reported-by: Mark Davies Suggested-by: Mark Davies Signed-off-by: Peter Zijlstra Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/n/tip-69efsat90ibhnd577zy3z9gh@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1246b853c4e..895604f2e91 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1826,7 +1826,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (!!val != !!x86_pmu.attr_rdpmc) { x86_pmu.attr_rdpmc = !!val; - smp_call_function(change_rdpmc, (void *)val, 1); + on_each_cpu(change_rdpmc, (void *)val, 1); } return count; -- cgit v1.2.3-70-g09d2 From 26e61e8939b1fe8729572dabe9a9e97d930dd4f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Feb 2014 16:03:12 +0100 Subject: perf/x86: Fix event scheduling Vince "Super Tester" Weaver reported a new round of syscall fuzzing (Trinity) failures, with perf WARN_ON()s triggering. He also provided traces of the failures. This is I think the relevant bit: > pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_disable: x86_pmu_disable > pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_state: Events: { > pec_1076_warn-2804 [000] d... 147.926156: x86_pmu_state: 0: state: .R config: ffffffffffffffff ( (null)) > pec_1076_warn-2804 [000] d... 147.926158: x86_pmu_state: 33: state: AR config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926159: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926160: x86_pmu_state: n_events: 1, n_added: 0, n_txn: 1 > pec_1076_warn-2804 [000] d... 147.926161: x86_pmu_state: Assignment: { > pec_1076_warn-2804 [000] d... 147.926162: x86_pmu_state: 0->33 tag: 1 config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926163: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926166: collect_events: Adding event: 1 (ffff880119ec8800) So we add the insn:p event (fd[23]). At this point we should have: n_events = 2, n_added = 1, n_txn = 1 > pec_1076_warn-2804 [000] d... 147.926170: collect_events: Adding event: 0 (ffff8800c9e01800) > pec_1076_warn-2804 [000] d... 147.926172: collect_events: Adding event: 4 (ffff8800cbab2c00) We try and add the {BP,cycles,br_insn} group (fd[3], fd[4], fd[15]). These events are 0:cycles and 4:br_insn, the BP event isn't x86_pmu so that's not visible. group_sched_in() pmu->start_txn() /* nop - BP pmu */ event_sched_in() event->pmu->add() So here we should end up with: 0: n_events = 3, n_added = 2, n_txn = 2 4: n_events = 4, n_added = 3, n_txn = 3 But seeing the below state on x86_pmu_enable(), the must have failed, because the 0 and 4 events aren't there anymore. Looking at group_sched_in(), since the BP is the leader, its event_sched_in() must have succeeded, for otherwise we would not have seen the sibling adds. But since neither 0 or 4 are in the below state; their event_sched_in() must have failed; but I don't see why, the complete state: 0,0,1:p,4 fits perfectly fine on a core2. However, since we try and schedule 4 it means the 0 event must have succeeded! Therefore the 4 event must have failed, its failure will have put group_sched_in() into the fail path, which will call: event_sched_out() event->pmu->del() on 0 and the BP event. Now x86_pmu_del() will reduce n_events; but it will not reduce n_added; giving what we see below: n_event = 2, n_added = 2, n_txn = 2 > pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_enable: x86_pmu_enable > pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_state: Events: { > pec_1076_warn-2804 [000] d... 147.926179: x86_pmu_state: 0: state: .R config: ffffffffffffffff ( (null)) > pec_1076_warn-2804 [000] d... 147.926181: x86_pmu_state: 33: state: AR config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926182: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: n_events: 2, n_added: 2, n_txn: 2 > pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: Assignment: { > pec_1076_warn-2804 [000] d... 147.926186: x86_pmu_state: 0->33 tag: 1 config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: 1->0 tag: 1 config: 1 (ffff880119ec8800) > pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926190: x86_pmu_enable: S0: hwc->idx: 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0 So the problem is that x86_pmu_del(), when called from a group_sched_in() that fails (for whatever reason), and without x86_pmu TXN support (because the leader is !x86_pmu), will corrupt the n_added state. Reported-and-Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Stephane Eranian Cc: Dave Jones Cc: Link: http://lkml.kernel.org/r/20140221150312.GF3104@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 895604f2e91..79f9f848bee 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int flags) for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, event); -- cgit v1.2.3-70-g09d2