From 2dbf0116aa8c7bfa900352d3f7b2609748fcc1c5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 Sep 2013 20:37:38 -0700 Subject: perf/x86/intel: Avoid checkpointed counters causing excessive TSX aborts With checkpointed counters there can be a situation where the counter is overflowing, aborts the transaction, is set back to a non overflowing checkpoint, causes interupt. The interrupt doesn't see the overflow because it has been checkpointed. This is then a spurious PMI, typically with a ugly NMI message. It can also lead to excessive aborts. Avoid this problem by: - Using the full counter width for counting counters (earlier patch) - Forbid sampling for checkpointed counters. It's not too useful anyways, checkpointing is mainly for counting. The check is approximate (to still handle KVM), but should catch the majority of cases. - On a PMI always set back checkpointed counters to zero. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1378438661-24765-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9db76c31b3c..57d64b75f15 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1282,6 +1282,11 @@ static void intel_pmu_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } +static inline bool event_is_checkpointed(struct perf_event *event) +{ + return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; +} + /* * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: @@ -1289,6 +1294,17 @@ static void intel_pmu_enable_event(struct perf_event *event) int intel_pmu_save_and_restart(struct perf_event *event) { x86_perf_event_update(event); + /* + * For a checkpointed counter always reset back to 0. This + * avoids a situation where the counter overflows, aborts the + * transaction and is then set back to shortly before the + * overflow, and overflows and aborts again. + */ + if (unlikely(event_is_checkpointed(event))) { + /* No race with NMIs because the counter should not be armed */ + wrmsrl(event->hw.event_base, 0); + local64_set(&event->hw.prev_count, 0); + } return x86_perf_event_set_period(event); } @@ -1372,6 +1388,13 @@ again: x86_pmu.drain_pebs(regs); } + /* + * To avoid spurious interrupts with perf stat always reset checkpointed + * counters. + */ + if (cpuc->events[2] && event_is_checkpointed(cpuc->events[2])) + status |= (1ULL << 2); + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; @@ -1837,6 +1860,20 @@ static int hsw_hw_config(struct perf_event *event) event->attr.precise_ip > 0)) return -EOPNOTSUPP; + if (event_is_checkpointed(event)) { + /* + * Sampling of checkpointed events can cause situations where + * the CPU constantly aborts because of a overflow, which is + * then checkpointed back and ignored. Forbid checkpointing + * for sampling. + * + * But still allow a long sampling period, so that perf stat + * from KVM works. + */ + if (event->attr.sample_period > 0 && + event->attr.sample_period < 0x7fffffff) + return -EOPNOTSUPP; + } return 0; } -- cgit v1.2.3-70-g09d2 From 748e86aa90edfddfa6016f1cf383ff5bc6aada91 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 Sep 2013 20:37:39 -0700 Subject: perf/x86: Report TSX transaction abort cost as weight Use the existing weight reporting facility to report the transaction abort cost, that is the number of cycles wasted in aborts. Haswell reports this in the PEBS record. This was in fact the original user for weight. This is a very useful sort key to concentrate on the most costly aborts and a good metric for TSX tuning. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1378438661-24765-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 55 +++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 63438aad177..104cbba3b59 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -182,16 +182,29 @@ struct pebs_record_nhm { * Same as pebs_record_nhm, with two additional fields. */ struct pebs_record_hsw { - struct pebs_record_nhm nhm; - /* - * Real IP of the event. In the Intel documentation this - * is called eventingrip. - */ - u64 real_ip; - /* - * TSX tuning information field: abort cycles and abort flags. - */ - u64 tsx_tuning; + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; + u64 real_ip; /* the actual eventing ip */ + u64 tsx_tuning; /* TSX abort cycles and flags */ +}; + +union hsw_tsx_tuning { + struct { + u32 cycles_last_block : 32, + hle_abort : 1, + rtm_abort : 1, + instruction_abort : 1, + non_instruction_abort : 1, + retry : 1, + data_conflict : 1, + capacity_writes : 1, + capacity_reads : 1; + }; + u64 value; }; void init_debug_store_on_cpu(int cpu) @@ -785,16 +798,26 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 0; } +static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) +{ + if (pebs->tsx_tuning) { + union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; + return tsx.cycles_last_block; + } + return 0; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { /* * We cast to pebs_record_nhm to get the load latency data * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used + * We cast to the biggest PEBS record are careful not + * to access out-of-bounds members. */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct pebs_record_nhm *pebs = __pebs; - struct pebs_record_hsw *pebs_hsw = __pebs; + struct pebs_record_hsw *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; u64 sample_type; @@ -853,7 +876,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.sp = pebs->sp; if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { - regs.ip = pebs_hsw->real_ip; + regs.ip = pebs->real_ip; regs.flags |= PERF_EFLAGS_EXACT; } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) regs.flags |= PERF_EFLAGS_EXACT; @@ -864,6 +887,12 @@ static void __intel_pmu_pebs_event(struct perf_event *event, x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; + /* Only set the TSX weight when no memory weight was requested. */ + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && + !fll && + (x86_pmu.intel_cap.pebs_format >= 2)) + data.weight = intel_hsw_weight(pebs); + if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; -- cgit v1.2.3-70-g09d2 From 4b2c4f1f1b71fe18381f089c501ac21cd2167dfa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 Sep 2013 20:37:40 -0700 Subject: perf/x86/intel: Add Haswell TSX event aliases Add TSX event aliases, and export them from the kernel to perf. These are used by perf stat -T and to allow more user friendly access to events. The events are designed to be fairly generic and may also apply to other architectures implementing HTM. They all cover common situations that happens during tuning of transactional code. For Haswell we have to separate the HLE and RTM events, as they are separate in the PMU. This adds the following events: tx-start Count start transaction (used by perf stat -T) tx-commit Count commit of transaction tx-abort Count all aborts tx-conflict Count aborts due to conflict with another CPU. tx-capacity Count capacity aborts (transaction too large) Then matching el-* events for HLE cycles-t Transactional cycles (used by perf stat -T) * also exists on POWER8 cycles-ct Transactional cycles commited (used by perf stat -T) * according to Michael Ellerman POWER8 has a cycles-transactional-committed, * perf stat -T handles both cases Note for useful abort profiling often precise has to be set, as Haswell can only report the point inside the transaction with precise=2. For some classes of aborts, like conflicts, this is not needed, as it makes more sense to look at the complete critical section. This gives a clean set of generalized events to examine transaction success and aborts. Haswell has additional events for TSX, but those are more specialized for very specific situations. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1378438661-24765-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 57d64b75f15..dd1d4f3e18e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2222,7 +2222,34 @@ static __init void intel_nehalem_quirk(void) EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") +/* Haswell special events */ +EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); +EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); +EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); +EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); +EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); +EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); +EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); +EVENT_ATTR_STR(cycles-ct, cycles_ct, + "event=0x3c,in_tx=1,in_tx_cp=1"); + static struct attribute *hsw_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_capacity), + EVENT_PTR(tx_conflict), + EVENT_PTR(el_start), + EVENT_PTR(el_commit), + EVENT_PTR(el_abort), + EVENT_PTR(el_capacity), + EVENT_PTR(el_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), NULL -- cgit v1.2.3-70-g09d2 From 2b9e344df384e595db24ac61ae5f780e9b024878 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Sep 2013 12:53:44 +0200 Subject: perf/x86/intel: Clean up checkpoint-interrupt bits Clean up the weird CP interrupt exception code by keeping a CP mask. Andi suggested this implementation but weirdly didn't actually implement it himself, do so now because it removes the conditional in the interrupt handler and avoids the assumption its only on cnt2. Suggested-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-dvb4q0rydkfp00kqat4p5bah@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 5 +++++ arch/x86/kernel/cpu/perf_event_intel.c | 22 +++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index cc16faae053..ce84edeeae2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -163,6 +163,11 @@ struct cpu_hw_events { u64 intel_ctrl_host_mask; struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; + /* + * Intel checkpoint mask + */ + u64 intel_cp_status; + /* * manage shared (per-core, per-cpu) registers * used on Intel NHM/WSM/SNB diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dd1d4f3e18e..ec70d0cce55 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) wrmsrl(hwc->config_base, ctrl_val); } +static inline bool event_is_checkpointed(struct perf_event *event) +{ + return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; +} + static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + cpuc->intel_cp_status &= ~(1ull << hwc->idx); /* * must disable before any actual event @@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event) if (event->attr.exclude_guest) cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); + if (unlikely(event_is_checkpointed(event))) + cpuc->intel_cp_status |= (1ull << hwc->idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc); return; @@ -1282,11 +1291,6 @@ static void intel_pmu_enable_event(struct perf_event *event) __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } -static inline bool event_is_checkpointed(struct perf_event *event) -{ - return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; -} - /* * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: @@ -1389,11 +1393,11 @@ again: } /* - * To avoid spurious interrupts with perf stat always reset checkpointed - * counters. + * Checkpointed counters can lead to 'spurious' PMIs because the + * rollback caused by the PMI will have cleared the overflow status + * bit. Therefore always force probe these counters. */ - if (cpuc->events[2] && event_is_checkpointed(cpuc->events[2])) - status |= (1ULL << 2); + status |= cpuc->intel_cp_status; for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; -- cgit v1.2.3-70-g09d2 From d2beea4a3419e63804094e9ac4b6d1518bc17a9b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Sep 2013 13:00:47 +0200 Subject: perf/x86/intel: Clean-up/reduce PEBS code Get rid of some pointless duplication introduced by the Haswell code. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8q6y4davda9aawwv5yxe7klp@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 94 +++++++++---------------------- 1 file changed, 26 insertions(+), 68 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 104cbba3b59..f364c13ddaa 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -188,8 +188,7 @@ struct pebs_record_hsw { u64 r8, r9, r10, r11; u64 r12, r13, r14, r15; u64 status, dla, dse, lat; - u64 real_ip; /* the actual eventing ip */ - u64 tsx_tuning; /* TSX abort cycles and flags */ + u64 real_ip, tsx_tuning; }; union hsw_tsx_tuning { @@ -811,10 +810,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { /* - * We cast to pebs_record_nhm to get the load latency data - * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used - * We cast to the biggest PEBS record are careful not - * to access out-of-bounds members. + * We cast to the biggest pebs_record but are careful not to + * unconditionally access the 'extra' entries. */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_hsw *pebs = __pebs; @@ -884,12 +881,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && - x86_pmu.intel_cap.pebs_format >= 1) + x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; /* Only set the TSX weight when no memory weight was requested. */ - if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && - !fll && + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll && (x86_pmu.intel_cap.pebs_format >= 2)) data.weight = intel_hsw_weight(pebs); @@ -941,17 +937,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) __intel_pmu_pebs_event(event, iregs, at); } -static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, - void *top) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = NULL; + void *at, *top; u64 status = 0; - int bit; + int bit, n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; ds->pebs_index = ds->pebs_buffer_base; + n = (top - at) / x86_pmu.pebs_record_size; + if (n <= 0) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(n > x86_pmu.max_pebs_events, + "Unexpected number of pebs records %d\n", n); + for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; @@ -979,61 +992,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_nhm *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; - - ds->pebs_index = ds->pebs_buffer_base; - - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); - - return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - -static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_hsw *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; - - n = top - at; - if (n <= 0) - return; - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); - - return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - /* * BTS, PEBS probe and setup */ @@ -1068,7 +1026,7 @@ void intel_ds_init(void) case 2: pr_cont("PEBS fmt2%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); - x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; default: -- cgit v1.2.3-70-g09d2 From 7f2ee91f54fb56071f97bde1ef7ba7ba0d58dfe5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Sep 2013 19:17:00 +0200 Subject: perf/x86/intel: Clean up EVENT_ATTR_STR() muck Make the code a bit more readable by removing stray whitespaces et al. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/n/tip-lzEnychz1ylqy8zjenxOmeht@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 35 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ec70d0cce55..353b7a3a258 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); -EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); -EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); +EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); +EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), @@ -2223,23 +2223,22 @@ static __init void intel_nehalem_quirk(void) } } -EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); -EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") +EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") /* Haswell special events */ -EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); -EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); -EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); -EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); -EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); -EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); -EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); -EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); -EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); -EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); -EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); -EVENT_ATTR_STR(cycles-ct, cycles_ct, - "event=0x3c,in_tx=1,in_tx_cp=1"); +EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); +EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); +EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); +EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); +EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); +EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); +EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); +EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); +EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); static struct attribute *hsw_events_attrs[] = { EVENT_PTR(tx_start), -- cgit v1.2.3-70-g09d2 From eb8417aa703eff5ff43d0275f19b0a8e591d818d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Sep 2013 09:23:02 +0200 Subject: perf/x86/intel: Remove division from the intel_pmu_drain_pebs_nhm() hot path Only do the division in case we have to print the result out in a warning. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-43nl31erfbajwpfj254f6zji@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index f364c13ddaa..655d591b57a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -944,7 +944,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct perf_event *event = NULL; void *at, *top; u64 status = 0; - int bit, n; + int bit; if (!x86_pmu.pebs_active) return; @@ -954,16 +954,16 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; - n = (top - at) / x86_pmu.pebs_record_size; - if (n <= 0) + if (unlikely(at > top)) return; /* * Should not happen, we program the threshold at 1 and do not * set a reset value. */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); + WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, + "Unexpected number of pebs records %ld\n", + (top - at) / x86_pmu.pebs_record_size); for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; -- cgit v1.2.3-70-g09d2 From 73c4427c6ca3b32fa0441791e9c6eadceff7242f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 17 Sep 2013 14:48:13 +0800 Subject: perf/x86/intel/uncore: Don't use smp_processor_id() in validate_group() uncore_validate_group() can't call smp_processor_id() because it is in preemptible context. Pass NUMA_NO_NODE to the allocator instead. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379400493-11505-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8ed44589b0e..4118f9f6831 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) box->hrtimer.function = uncore_pmu_hrtimer; } -struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) +static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) { struct intel_uncore_box *box; int i, size; size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); - box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + box = kzalloc_node(size, GFP_KERNEL, node); if (!box) return NULL; @@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, struct intel_uncore_box *fake_box; int ret = -EINVAL, n; - fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM; @@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; - box = uncore_alloc_box(type, 0); + box = uncore_alloc_box(type, NUMA_NO_NODE); if (!box) return -ENOMEM; @@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id) if (pmu->func_id < 0) pmu->func_id = j; - box = uncore_alloc_box(type, cpu); + box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From fa7315871046b9a4c48627905691dbde57e51033 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Sep 2013 10:16:42 +0200 Subject: perf: Fix capabilities bitfield compatibility in 'struct perf_event_mmap_page' Solve the problems around the broken definition of perf_event_mmap_page:: cap_usr_time and cap_usr_rdpmc fields which used to overlap, partially fixed by: 860f085b74e9 ("perf: Fix broken union in 'struct perf_event_mmap_page'") The problem with the fix (merged in v3.12-rc1 and not yet released officially), noticed by Vince Weaver is that the new behavior is not detectable by new user-space, and that due to the reuse of the field names it's easy to mis-compile a binary if old headers are used on a new kernel or new headers are used on an old kernel. To solve all that make this change explicit, detectable and self-contained, by iterating the ABI the following way: - Always clear bit 0, and rename it to usrpage->cap_bit0, to at least not confuse old user-space binaries. RDPMC will be marked as unavailable to old binaries but that's within the ABI, this is a capability bit. - Rename bit 1 to ->cap_bit0_is_deprecated and always set it to 1, so new libraries can reliably detect that bit 0 is deprecated and perma-zero without having to check the kernel version. - Use bits 2, 3, 4 for the newly defined, correct functionality: cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ cap_user_time : 1, /* The time_* fields are used */ cap_user_time_zero : 1, /* The time_zero field is used */ - Rename all the bitfield names in perf_event.h to be different from the old names, to make sure it's not possible to mis-compile it accidentally with old assumptions. The 'size' field can then be used in the future to add new fields and it will act as a natural ABI version indicator as well. Also adjust tools/perf/ userspace for the new definitions, noticed by Adrian Hunter. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Also-Fixed-by: Adrian Hunter Link: http://lkml.kernel.org/n/tip-zr03yxjrpXesOzzupszqglbv@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 10 +++++----- include/uapi/linux/perf_event.h | 14 +++++++++----- kernel/events/core.c | 21 +++++++++++++++++++++ tools/perf/arch/x86/util/tsc.c | 6 +++--- 4 files changed, 38 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8355c84b972..a9c606bb494 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1883,9 +1883,9 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { - userpg->cap_usr_time = 0; - userpg->cap_usr_time_zero = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) @@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; - userpg->cap_usr_time = 1; + userpg->cap_user_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; if (sched_clock_stable && !check_tsc_disabled()) { - userpg->cap_usr_time_zero = 1; + userpg->cap_user_time_zero = 1; userpg->time_zero = this_cpu_read(cyc2ns_offset); } } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7f6d584c267..009a655a5d3 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -380,10 +380,13 @@ struct perf_event_mmap_page { union { __u64 capabilities; struct { - __u64 cap_usr_time : 1, - cap_usr_rdpmc : 1, - cap_usr_time_zero : 1, - cap_____res : 61; + __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ + cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ + + cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ + cap_user_time : 1, /* The time_* fields are used */ + cap_user_time_zero : 1, /* The time_zero field is used */ + cap_____res : 59; }; }; @@ -442,12 +445,13 @@ struct perf_event_mmap_page { * ((rem * time_mult) >> time_shift); */ __u64 time_zero; + __u32 size; /* Header size up to __reserved[] fields. */ /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[119]; /* align to 1k */ + __u8 __reserved[118*8+4]; /* align to 1k. */ /* * Control data for the mmap() data buffer. diff --git a/kernel/events/core.c b/kernel/events/core.c index dd236b66ca3..cb4238e85b3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event, *running = ctx_time - event->tstamp_running; } +static void perf_event_init_userpage(struct perf_event *event) +{ + struct perf_event_mmap_page *userpg; + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (!rb) + goto unlock; + + userpg = rb->user_page; + + /* Allow new userspace to detect that bit 0 is deprecated */ + userpg->cap_bit0_is_deprecated = 1; + userpg->size = offsetof(struct perf_event_mmap_page, __reserved); + +unlock: + rcu_read_unlock(); +} + void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { } @@ -4044,6 +4064,7 @@ again: ring_buffer_attach(event, rb); rcu_assign_pointer(event->rb, rb); + perf_event_init_userpage(event); perf_event_update_userpage(event); unlock: diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 9570c2b0f83..b2519e49424 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) { - bool cap_usr_time_zero; + bool cap_user_time_zero; u32 seq; int i = 0; @@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, tc->time_mult = pc->time_mult; tc->time_shift = pc->time_shift; tc->time_zero = pc->time_zero; - cap_usr_time_zero = pc->cap_usr_time_zero; + cap_user_time_zero = pc->cap_user_time_zero; rmb(); if (pc->lock == seq && !(seq & 1)) break; @@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } } - if (!cap_usr_time_zero) + if (!cap_user_time_zero) return -EOPNOTSUPP; return 0; -- cgit v1.2.3-70-g09d2 From 92519bbc8af612975410def52bd462ca9af85cdb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Sep 2013 13:08:50 +0200 Subject: perf/x86/intel: Fix build warning in intel_pmu_drain_pebs_nhm() Fengguang Wu reported this build warning: arch/x86/kernel/cpu/perf_event_intel_ds.c: In function 'intel_pmu_drain_pebs_nhm': arch/x86/kernel/cpu/perf_event_intel_ds.c:964:2: warning: format '%ld' expects argument of type 'long int', but argument 4 has type 'int' Because pointer arithmetics result type is bitness dependent there's no natural type to use here, cast it to long. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-jbpauwxJqtf24luewcsdFith@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 655d591b57a..54ff6ce519c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -963,7 +963,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) */ WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, "Unexpected number of pebs records %ld\n", - (top - at) / x86_pmu.pebs_record_size); + (long)(top - at) / x86_pmu.pebs_record_size); for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; -- cgit v1.2.3-70-g09d2 From cf3b425dd8d99e01214515a6754f9e69ecc6dce8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 22 Sep 2013 16:19:13 +0800 Subject: perf/x86/intel: Add model number for Avoton Silvermont Signed-off-by: Yan, Zheng Cc: a.p.zijlstra@chello.nl Cc: eranian@google.com Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/1379837953-17755-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9db76c31b3c..f31a1655d1f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void) break; case 55: /* Atom 22nm "Silvermont" */ + case 77: /* Avoton "Silvermont" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, -- cgit v1.2.3-70-g09d2 From c2daa3bed53a81171cf8c1a36db798e82b91afe8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Provide a per-cpu preempt_count implementation Convert x86 to use a per-cpu preemption count. The reason for doing so is that accessing per-cpu variables is a lot cheaper than accessing thread_info variables. We still need to save/restore the actual preemption count due to PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the same cache-line as the other hot __switch_to() variables such as current_task. NOTE: this save/restore is required even for !PREEMPT kernels as cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore task_struct::state. Also rename thread_info::preempt_count to ensure nobody is 'accidentally' still poking at it. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-gzn5rfsf8trgjoqx8hyayy3q@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/preempt.h | 98 ++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/thread_info.h | 5 +- arch/x86/kernel/asm-offsets.c | 1 - arch/x86/kernel/cpu/common.c | 5 ++ arch/x86/kernel/entry_32.S | 7 +-- arch/x86/kernel/entry_64.S | 4 +- arch/x86/kernel/irq_32.c | 4 -- arch/x86/kernel/process_32.c | 8 ++++ arch/x86/kernel/process_64.c | 8 ++++ 10 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/preempt.h (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index eca20286a91..7f669853317 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,3 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h -generic-y += preempt.h diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 00000000000..1309942b95e --- /dev/null +++ b/arch/x86/include/asm/preempt.h @@ -0,0 +1,98 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include +#include +#include + +DECLARE_PER_CPU(int, __preempt_count); + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void preempt_count_set(int pc) +{ + __this_cpu_write_4(__preempt_count, pc); +} + +/* + * must be macros to avoid header recursion hell + */ +#define task_preempt_count(p) \ + (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); +} + +static __always_inline void clear_preempt_need_resched(void) +{ + __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + __this_cpu_add_4(__preempt_count, val); +} + +static __always_inline void __preempt_count_sub(int val) +{ + __this_cpu_add_4(__preempt_count, -val); +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); +} + +/* + * Returns true when we need to resched -- even if we can not. + */ +static __always_inline bool need_resched(void) +{ + return unlikely(test_preempt_need_resched()); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!__this_cpu_read_4(__preempt_count)); +} + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 27811190cbd..c46a46be1ec 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -28,8 +28,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ + int saved_preempt_count; mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -49,7 +48,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = INIT_PREEMPT_COUNT, \ + .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 28610822fb3..9f6b9341950 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -32,7 +32,6 @@ void common(void) { OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_preempt_count, thread_info, preempt_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2793d1f095a..5223fe6dec7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); /* @@ -1169,6 +1172,8 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0dcb0ceb6a..fd1bc1b15e6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -362,12 +362,9 @@ END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all + cmpl $0,PER_CPU_VAR(__preempt_count) + jnz restore_all testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e..6a43e7d29fe 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1118,10 +1118,8 @@ retint_signal: /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ ENTRY(retint_kernel) - cmpl $0,TI_preempt_count(%rcx) + cmpl $0,PER_CPU_VAR(__preempt_count) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) - jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4186755f1d7..3fe066359ac 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; - /* Copy the preempt_count so that the [soft]irq checks work. */ - irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; - if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -131,7 +128,6 @@ void irq_ctx_init(int cpu) THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); per_cpu(hardirq_ctx, cpu) = irqctx; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 884f98f6935..c2ec1aa6d45 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -291,6 +291,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + /* * Now maybe handle debug registers and/or IO bitmaps */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb1dc51bab0..45ab4d6fc8a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + this_cpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -- cgit v1.2.3-70-g09d2 From 8a3da6c7d0031fcb6a0d17f9c7a68b0e01f52855 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Sep 2013 15:48:48 +0200 Subject: perf/x86: Fix PMU detection printout when no PMU is detected Ran into this cryptic PMU bootup log recently: [ 0.124047] Performance Events: [ 0.125000] smpboot: ... Turns out we print this if no PMU is detected. Fall back to the right condition so that the following is printed: [ 0.122381] Performance Events: no PMU driver, software events only. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/n/tip-u2fwaUffakjp0qkpRfqljgsn@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a9c606bb494..897783b3302 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); -- cgit v1.2.3-70-g09d2 From d8b11a0cbd1c66ce283eb9dabe0498dfa6483f32 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Oct 2013 16:00:14 +0200 Subject: perf/x86: Clean up cap_user_time* setting Currently the cap_user_time_zero capability has different tests than cap_user_time; even though they expose the exact same data. Switch from CONSTANT && NONSTOP to sched_clock_stable to also deal with multi cabinet machines and drop the tsc_disabled() check.. non of this will work sanely without tsc anyway. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-nmgn0j0muo1r4c94vlfh23xy@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 897783b3302..9d8449158cf 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1888,10 +1888,7 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) - return; - - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + if (!sched_clock_stable) return; userpg->cap_user_time = 1; @@ -1899,10 +1896,8 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; - if (sched_clock_stable && !check_tsc_disabled()) { - userpg->cap_user_time_zero = 1; - userpg->time_zero = this_cpu_read(cyc2ns_offset); - } + userpg->cap_user_time_zero = 1; + userpg->time_zero = this_cpu_read(cyc2ns_offset); } /* -- cgit v1.2.3-70-g09d2 From a405bad5ad2086766ce320b16a56952e013327f8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:40 -0700 Subject: perf/x86: Add Haswell specific transaction flag reporting In the PEBS handler report the transaction flags using the new generic transaction flags facility. Most of them come from the "tsx_tuning" field in PEBSv2, but the abort code is derived from the RAX register reported in the PEBS record. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 07d9a052ee7..32e9ed81cd0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -206,6 +206,8 @@ union hsw_tsx_tuning { u64 value; }; +#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL + void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -807,6 +809,16 @@ static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) return 0; } +static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) +{ + u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + + /* For RTM XABORTs also log the abort code from AX */ + if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) + txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + return txn; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { @@ -885,10 +897,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event, x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; - /* Only set the TSX weight when no memory weight was requested. */ - if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll && - (x86_pmu.intel_cap.pebs_format >= 2)) - data.weight = intel_hsw_weight(pebs); + if (x86_pmu.intel_cap.pebs_format >= 2) { + /* Only set the TSX weight when no memory weight. */ + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) + data.weight = intel_hsw_weight(pebs); + + if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) + data.txn = intel_hsw_transaction(pebs); + } if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; -- cgit v1.2.3-70-g09d2 From b7af41a1bc255c0098c37a4bcf5c7e5e168ce875 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:44 -0700 Subject: perf/x86: Suppress duplicated abort LBR records Haswell always give an extra LBR record after every TSX abort. Suppress the extra record. This only works when the abort is visible in the LBR If the original abort has already left the 16 LBR entries the extra entry will will stay. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-7-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 1 + arch/x86/kernel/cpu/perf_event_intel_lbr.c | 29 +++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ce84edeeae2..fd00bb29425 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -445,6 +445,7 @@ struct x86_pmu { int lbr_nr; /* hardware stack size */ u64 lbr_sel_mask; /* LBR_SELECT valid bits */ const int *lbr_sel_map; /* lbr_select mappings */ + bool lbr_double_abort; /* duplicated lbr aborts */ /* * Extra registers for events diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 36b5ab884c1..0fa4f242f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2519,6 +2519,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.lbr_double_abort = true; pr_cont("Haswell events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d5be06a5005..90ee6c1d054 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); int i; + int out = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; @@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) } from = (u64)((((s64)from) << skip) >> skip); - cpuc->lbr_entries[i].from = from; - cpuc->lbr_entries[i].to = to; - cpuc->lbr_entries[i].mispred = mis; - cpuc->lbr_entries[i].predicted = pred; - cpuc->lbr_entries[i].in_tx = in_tx; - cpuc->lbr_entries[i].abort = abort; - cpuc->lbr_entries[i].reserved = 0; + /* + * Some CPUs report duplicated abort records, + * with the second entry not having an abort bit set. + * Skip them here. This loop runs backwards, + * so we need to undo the previous record. + * If the abort just happened outside the window + * the extra entry cannot be removed. + */ + if (abort && x86_pmu.lbr_double_abort && out > 0) + out--; + + cpuc->lbr_entries[out].from = from; + cpuc->lbr_entries[out].to = to; + cpuc->lbr_entries[out].mispred = mis; + cpuc->lbr_entries[out].predicted = pred; + cpuc->lbr_entries[out].in_tx = in_tx; + cpuc->lbr_entries[out].abort = abort; + cpuc->lbr_entries[out].reserved = 0; + out++; } - cpuc->lbr_stack.nr = i; + cpuc->lbr_stack.nr = out; } void intel_pmu_lbr_read(void) -- cgit v1.2.3-70-g09d2 From 9e7827b5ea4ca93b4d864bc07c0fafb838d496b1 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 30 Sep 2013 17:28:52 +0200 Subject: x86, hyperv: Get the local APIC timer frequency from the hypervisor Hyper-V supports a mechanism for retrieving the local APIC frequency. Use this and bypass the calibration code in the kernel . This would allow us to boot the Linux kernel as a "modern VM" on Hyper-V where many of the legacy devices (such as PIT) are not emulated. I would like to thank Olaf Hering , Jan Beulich and H. Peter Anvin for their help in this effort. In this version of the patch, I have addressed Jan's comments. Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1380554932-9888-1-git-send-email-olaf@aepfle.de Tested-by: Olaf Hering Signed-off-by: H. Peter Anvin --- arch/x86/include/uapi/asm/hyperv.h | 19 +++++++++++++++++++ arch/x86/kernel/cpu/mshyperv.c | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b80420bcd09..b8f1c0176cb 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -27,6 +27,19 @@ #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) + +/* + * There is a single feature flag that signifies the presence of the MSR + * that can be used to retrieve both the local APIC Timer frequency as + * well as the TSC frequency. + */ + +/* Local APIC timer frequency MSR (HV_X64_MSR_APIC_FREQUENCY) is available */ +#define HV_X64_MSR_APIC_FREQUENCY_AVAILABLE (1 << 11) + +/* TSC frequency MSR (HV_X64_MSR_TSC_FREQUENCY) is available */ +#define HV_X64_MSR_TSC_FREQUENCY_AVAILABLE (1 << 11) + /* * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available @@ -136,6 +149,12 @@ /* MSR used to read the per-partition time reference counter */ #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +/* MSR used to retrieve the TSC frequency */ +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 + +/* MSR used to retrieve the local APIC timer frequency */ +#define HV_X64_MSR_APIC_FREQUENCY 0x40000023 + /* Define the virtual APIC registers */ #define HV_X64_MSR_EOI 0x40000070 #define HV_X64_MSR_ICR 0x40000071 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 71a39f3621b..0a490ca278e 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -67,6 +69,8 @@ static struct clocksource hyperv_cs = { static void __init ms_hyperv_init_platform(void) { + u64 hv_lapic_frequency; + /* * Extract the features and hints */ @@ -76,6 +80,26 @@ static void __init ms_hyperv_init_platform(void) printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); + if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { + /* + * Get the APIC frequency. + */ + rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); + hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); + lapic_timer_frequency = hv_lapic_frequency; + printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", + lapic_timer_frequency); + + /* + * On Hyper-V, when we are booting off an EFI firmware stack, + * we do not have many legacy devices including PIC, PIT etc. + */ + if (efi_enabled(EFI_BOOT)) { + printk(KERN_INFO "HyperV: Using null_legacy_pic\n"); + legacy_pic = &null_legacy_pic; + } + } + if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); } -- cgit v1.2.3-70-g09d2 From 90ab9d5510932e146a9443bf5a591f95d5b5ada8 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 10 Oct 2013 15:30:24 -0700 Subject: x86, hyperv: Correctly guard the local APIC calibration code The code that gets the local APIC timer frequency from the hypervisor rather depends on there being a local APIC. Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1381444224-3303-1-git-send-email-kys@microsoft.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 0a490ca278e..628ff50ab75 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -80,6 +80,7 @@ static void __init ms_hyperv_init_platform(void) printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); +#ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { /* * Get the APIC frequency. @@ -99,6 +100,7 @@ static void __init ms_hyperv_init_platform(void) legacy_pic = &null_legacy_pic; } } +#endif if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); -- cgit v1.2.3-70-g09d2 From 3f79410c7c9c8ef33ccff60c61e1f1166f5ed64a Mon Sep 17 00:00:00 2001 From: Maxime Jayat Date: Sat, 12 Oct 2013 01:29:46 +0200 Subject: treewide: Fix common typo in "identify" Correct common misspelling of "identify" as "indentify" throughout the kernel Signed-off-by: Maxime Jayat Signed-off-by: Jiri Kosina --- arch/mips/include/asm/octeon/cvmx-pip.h | 4 ++-- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +- arch/x86/kernel/cpu/scattered.c | 2 +- drivers/media/rc/keymaps/rc-dib0700-nec.c | 2 +- drivers/media/rc/keymaps/rc-dib0700-rc5.c | 2 +- drivers/net/irda/ali-ircc.c | 2 +- drivers/net/irda/nsc-ircc.c | 2 +- drivers/s390/scsi/zfcp_dbf.c | 6 +++--- drivers/scsi/bnx2i/bnx2i_hwi.c | 16 ++++++++-------- drivers/scsi/bnx2i/bnx2i_iscsi.c | 14 +++++++------- drivers/scsi/ncr53c8xx.c | 2 +- drivers/scsi/sym53c8xx_2/sym_glue.h | 2 +- drivers/staging/iio/adc/ad7606.h | 2 +- include/linux/amba/serial.h | 2 +- include/linux/mfd/si476x-core.h | 2 +- include/linux/netdevice.h | 2 +- include/net/bluetooth/l2cap.h | 2 +- net/can/af_can.c | 2 +- net/netfilter/xt_set.c | 4 ++-- 19 files changed, 36 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/mips/include/asm/octeon/cvmx-pip.h b/arch/mips/include/asm/octeon/cvmx-pip.h index a76fe5a57a9..df69bfd2b00 100644 --- a/arch/mips/include/asm/octeon/cvmx-pip.h +++ b/arch/mips/include/asm/octeon/cvmx-pip.h @@ -192,13 +192,13 @@ typedef struct { /* Number of packets processed by PIP */ uint32_t packets; /* - * Number of indentified L2 multicast packets. Does not + * Number of identified L2 multicast packets. Does not * include broadcast packets. Only includes packets whose * parse mode is SKIP_TO_L2 */ uint32_t multicast_packets; /* - * Number of indentified L2 broadcast packets. Does not + * Number of identified L2 broadcast packets. Does not * include multicast packets. Only includes packets whose * parse mode is SKIP_TO_L2 */ diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1414c90feab..0641113e296 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,5 +1,5 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to identify caches on Intel CPU. * * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index f2cc63e9cf0..b6f794aa169 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -1,5 +1,5 @@ /* - * Routines to indentify additional cpu features that are scattered in + * Routines to identify additional cpu features that are scattered in * cpuid space. */ #include diff --git a/drivers/media/rc/keymaps/rc-dib0700-nec.c b/drivers/media/rc/keymaps/rc-dib0700-nec.c index 4d13a7f2e5c..492a05ade7e 100644 --- a/drivers/media/rc/keymaps/rc-dib0700-nec.c +++ b/drivers/media/rc/keymaps/rc-dib0700-nec.c @@ -5,7 +5,7 @@ * TODO: This table is a real mess, as it merges RC codes from several * devices into a big table. It also has both RC-5 and NEC codes inside. * It should be broken into small tables, and the protocols should properly - * be indentificated. + * be identificated. * * The table were imported from dib0700_devices.c. * diff --git a/drivers/media/rc/keymaps/rc-dib0700-rc5.c b/drivers/media/rc/keymaps/rc-dib0700-rc5.c index ba81d9697cf..454ea596a7e 100644 --- a/drivers/media/rc/keymaps/rc-dib0700-rc5.c +++ b/drivers/media/rc/keymaps/rc-dib0700-rc5.c @@ -5,7 +5,7 @@ * TODO: This table is a real mess, as it merges RC codes from several * devices into a big table. It also has both RC-5 and NEC codes inside. * It should be broken into small tables, and the protocols should properly - * be indentificated. + * be identificated. * * The table were imported from dib0700_devices.c. * diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c index 7bbd318bc93..befa45f809c 100644 --- a/drivers/net/irda/ali-ircc.c +++ b/drivers/net/irda/ali-ircc.c @@ -627,7 +627,7 @@ static int ali_ircc_setup(chipio_t *info) /* * Function ali_ircc_read_dongle_id (int index, info) * - * Try to read dongle indentification. This procedure needs to be executed + * Try to read dongle identification. This procedure needs to be executed * once after power-on/reset. It also needs to be used whenever you suspect * that the user may have plugged/unplugged the IrDA Dongle. */ diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c index ceeb53737f8..66bc03bdb13 100644 --- a/drivers/net/irda/nsc-ircc.c +++ b/drivers/net/irda/nsc-ircc.c @@ -1035,7 +1035,7 @@ static int nsc_ircc_setup(chipio_t *info) /* * Function nsc_ircc_read_dongle_id (void) * - * Try to read dongle indentification. This procedure needs to be executed + * Try to read dongle identification. This procedure needs to be executed * once after power-on/reset. It also needs to be used whenever you suspect * that the user may have plugged/unplugged the IrDA Dongle. */ diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 132a905b6bd..0ca64484cfa 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -344,7 +344,7 @@ void zfcp_dbf_san(char *tag, struct zfcp_dbf *dbf, void *data, u8 id, u16 len, /** * zfcp_dbf_san_req - trace event for issued SAN request - * @tag: indentifier for event + * @tag: identifier for event * @fsf_req: request containing issued CT data * d_id: destination ID */ @@ -361,7 +361,7 @@ void zfcp_dbf_san_req(char *tag, struct zfcp_fsf_req *fsf, u32 d_id) /** * zfcp_dbf_san_res - trace event for received SAN request - * @tag: indentifier for event + * @tag: identifier for event * @fsf_req: request containing issued CT data */ void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf) @@ -377,7 +377,7 @@ void zfcp_dbf_san_res(char *tag, struct zfcp_fsf_req *fsf) /** * zfcp_dbf_san_in_els - trace event for incoming ELS - * @tag: indentifier for event + * @tag: identifier for event * @fsf_req: request containing issued CT data */ void zfcp_dbf_san_in_els(char *tag, struct zfcp_fsf_req *fsf) diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index af3e675d4d4..886e2f9eb0e 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -126,7 +126,7 @@ static void bnx2i_iscsi_license_error(struct bnx2i_hba *hba, u32 error_code) /** * bnx2i_arm_cq_event_coalescing - arms CQ to enable EQ notification - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * @action: action, ARM or DISARM. For now only ARM_CQE is used * * Arm'ing CQ will enable chip to generate global EQ events inorder to interrupt @@ -756,7 +756,7 @@ void bnx2i_send_cmd_cleanup_req(struct bnx2i_hba *hba, struct bnx2i_cmd *cmd) /** * bnx2i_send_conn_destroy - initiates iscsi connection teardown process * @hba: adapter structure pointer - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE to initiate * iscsi connection context clean-up process @@ -791,7 +791,7 @@ int bnx2i_send_conn_destroy(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep) /** * bnx2i_570x_send_conn_ofld_req - initiates iscsi conn context setup process * @hba: adapter structure pointer - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * 5706/5708/5709 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE */ @@ -851,7 +851,7 @@ static int bnx2i_570x_send_conn_ofld_req(struct bnx2i_hba *hba, /** * bnx2i_5771x_send_conn_ofld_req - initiates iscsi connection context creation * @hba: adapter structure pointer - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * 57710 specific - prepares and posts CONN_OFLD_REQ1/2 KWQE */ @@ -920,7 +920,7 @@ static int bnx2i_5771x_send_conn_ofld_req(struct bnx2i_hba *hba, * bnx2i_send_conn_ofld_req - initiates iscsi connection context setup process * * @hba: adapter structure pointer - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * this routine prepares and posts CONN_OFLD_REQ1/2 KWQE */ @@ -939,7 +939,7 @@ int bnx2i_send_conn_ofld_req(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep) /** * setup_qp_page_tables - iscsi QP page table setup function - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * Sets up page tables for SQ/RQ/CQ, 1G/sec (5706/5708/5709) devices requires * 64-bit address in big endian format. Whereas 10G/sec (57710) requires @@ -1046,7 +1046,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) /** * bnx2i_alloc_qp_resc - allocates required resources for QP. * @hba: adapter structure pointer - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * Allocate QP (transport layer for iSCSI connection) resources, DMA'able * memory for SQ/RQ/CQ and page tables. EP structure elements such @@ -1191,7 +1191,7 @@ mem_alloc_err: /** * bnx2i_free_qp_resc - free memory resources held by QP * @hba: adapter structure pointer - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * Free QP resources - SQ/RQ/CQ memory and page tables. */ diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index fabeb88602a..854dad7d5b0 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -596,7 +596,7 @@ void bnx2i_drop_session(struct iscsi_cls_session *cls_session) /** * bnx2i_ep_destroy_list_add - add an entry to EP destroy list * @hba: pointer to adapter instance - * @ep: pointer to endpoint (transport indentifier) structure + * @ep: pointer to endpoint (transport identifier) structure * * EP destroy queue manager */ @@ -613,7 +613,7 @@ static int bnx2i_ep_destroy_list_add(struct bnx2i_hba *hba, * bnx2i_ep_destroy_list_del - add an entry to EP destroy list * * @hba: pointer to adapter instance - * @ep: pointer to endpoint (transport indentifier) structure + * @ep: pointer to endpoint (transport identifier) structure * * EP destroy queue manager */ @@ -630,7 +630,7 @@ static int bnx2i_ep_destroy_list_del(struct bnx2i_hba *hba, /** * bnx2i_ep_ofld_list_add - add an entry to ep offload pending list * @hba: pointer to adapter instance - * @ep: pointer to endpoint (transport indentifier) structure + * @ep: pointer to endpoint (transport identifier) structure * * pending conn offload completion queue manager */ @@ -646,7 +646,7 @@ static int bnx2i_ep_ofld_list_add(struct bnx2i_hba *hba, /** * bnx2i_ep_ofld_list_del - add an entry to ep offload pending list * @hba: pointer to adapter instance - * @ep: pointer to endpoint (transport indentifier) structure + * @ep: pointer to endpoint (transport identifier) structure * * pending conn offload completion queue manager */ @@ -721,7 +721,7 @@ bnx2i_find_ep_in_destroy_list(struct bnx2i_hba *hba, u32 iscsi_cid) /** * bnx2i_ep_active_list_add - add an entry to ep active list * @hba: pointer to adapter instance - * @ep: pointer to endpoint (transport indentifier) structure + * @ep: pointer to endpoint (transport identifier) structure * * current active conn queue manager */ @@ -737,7 +737,7 @@ static void bnx2i_ep_active_list_add(struct bnx2i_hba *hba, /** * bnx2i_ep_active_list_del - deletes an entry to ep active list * @hba: pointer to adapter instance - * @ep: pointer to endpoint (transport indentifier) structure + * @ep: pointer to endpoint (transport identifier) structure * * current active conn queue manager */ @@ -1695,7 +1695,7 @@ no_nx2_route: /** * bnx2i_tear_down_conn - tear down iscsi/tcp connection and free resources * @hba: pointer to adapter instance - * @ep: endpoint (transport indentifier) structure + * @ep: endpoint (transport identifier) structure * * destroys cm_sock structure and on chip iscsi context */ diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index 5982a587bab..7d014b11df6 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -1615,7 +1615,7 @@ struct ncb { spinlock_t smp_lock; /* Lock for SMP threading */ /*---------------------------------------------------------------- - ** Chip and controller indentification. + ** Chip and controller identification. **---------------------------------------------------------------- */ int unit; /* Unit number */ diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h index b80bf709f10..805369521df 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.h +++ b/drivers/scsi/sym53c8xx_2/sym_glue.h @@ -174,7 +174,7 @@ struct sym_slcb { */ struct sym_shcb { /* - * Chip and controller indentification. + * Chip and controller identification. */ int unit; char inst_name[16]; diff --git a/drivers/staging/iio/adc/ad7606.h b/drivers/staging/iio/adc/ad7606.h index 9221a74efd1..93c7299e835 100644 --- a/drivers/staging/iio/adc/ad7606.h +++ b/drivers/staging/iio/adc/ad7606.h @@ -42,7 +42,7 @@ struct ad7606_platform_data { /** * struct ad7606_chip_info - chip specifc information - * @name: indentification string for chip + * @name: identification string for chip * @int_vref_mv: the internal reference voltage * @channels: channel specification * @num_channels: number of channels diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 62d9303c283..0ddb5c02ad8 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -40,7 +40,7 @@ #define UART010_LCRL 0x10 /* Line control register, low byte. */ #define UART010_CR 0x14 /* Control register. */ #define UART01x_FR 0x18 /* Flag register (Read only). */ -#define UART010_IIR 0x1C /* Interrupt indentification register (Read). */ +#define UART010_IIR 0x1C /* Interrupt identification register (Read). */ #define UART010_ICR 0x1C /* Interrupt clear register (Write). */ #define ST_UART011_LCRH_RX 0x1C /* Rx line control register. */ #define UART01x_ILPR 0x20 /* IrDA low power counter register. */ diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h index ba89b94e4a5..674b45d5a75 100644 --- a/include/linux/mfd/si476x-core.h +++ b/include/linux/mfd/si476x-core.h @@ -316,7 +316,7 @@ enum si476x_smoothmetrics { * response to 'FM_RD_STATUS' command * @rdstpptyint: Traffic program flag(TP) and/or program type(PTY) * code has changed. - * @rdspiint: Program indentifiaction(PI) code has changed. + * @rdspiint: Program identification(PI) code has changed. * @rdssyncint: RDS synchronization has changed. * @rdsfifoint: RDS was received and the RDS FIFO has at least * 'FM_RDS_INTERRUPT_FIFO_COUNT' elements in it. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8ed4ae94305..9ff50c9a000 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1546,7 +1546,7 @@ static inline void *netdev_priv(const struct net_device *dev) #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) /* Set the sysfs device type for the network logical device to allow - * fin grained indentification of different network device types. For + * fine-grained identification of different network device types. For * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 1a966afbbfa..7170b4b434e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -238,7 +238,7 @@ struct l2cap_conn_rsp { #define L2CAP_PSM_SDP 0x0001 #define L2CAP_PSM_RFCOMM 0x0003 -/* channel indentifier */ +/* channel identifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 #define L2CAP_CID_A2MP 0x0003 diff --git a/net/can/af_can.c b/net/can/af_can.c index 3ab8dd2e128..d249874a366 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -420,7 +420,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * @mask: CAN mask (see description) * @func: callback function on filter match * @data: returned parameter for callback function - * @ident: string for calling module indentification + * @ident: string for calling module identification * * Description: * Invokes the callback function with the received sk_buff and the given diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 31790e789e2..4b9d6b4f1eb 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -84,7 +84,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set indentified by id %u to match\n", + pr_warning("Cannot find set identified by id %u to match\n", info->match_set.index); return -ENOENT; } @@ -205,7 +205,7 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set indentified by id %u to match\n", + pr_warning("Cannot find set identified by id %u to match\n", info->match_set.index); return -ENOENT; } -- cgit v1.2.3-70-g09d2 From aa5e5dc2a8878ecf1a94819d889939023fd576c9 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Wed, 18 Sep 2013 06:00:43 +0200 Subject: treewide: fix "distingush" typo Signed-off-by: Michael Opdenacker Signed-off-by: Jiri Kosina --- arch/m32r/include/asm/mmu_context.h | 2 +- arch/mn10300/include/asm/mmu_context.h | 2 +- arch/powerpc/platforms/pseries/nvram.c | 2 +- arch/sh/include/asm/mmu_context.h | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- drivers/input/serio/i8042.c | 2 +- drivers/md/raid5.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h index a979a419816..9fc78fc4444 100644 --- a/arch/m32r/include/asm/mmu_context.h +++ b/arch/m32r/include/asm/mmu_context.h @@ -45,7 +45,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm) Flush all TLB and start new cycle. */ local_flush_tlb_all(); /* Fix version if needed. - Note that we avoid version #0 to distingush NO_CONTEXT. */ + Note that we avoid version #0 to distinguish NO_CONTEXT. */ if (!mc) mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION; } diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h index c67c2b5365a..75dbe696f83 100644 --- a/arch/mn10300/include/asm/mmu_context.h +++ b/arch/mn10300/include/asm/mmu_context.h @@ -71,7 +71,7 @@ static inline unsigned long allocate_mmu_context(struct mm_struct *mm) local_flush_tlb_all(); /* fix the TLB version if needed (we avoid version #0 so as to - * distingush MMU_NO_CONTEXT) */ + * distinguish MMU_NO_CONTEXT) */ if (!mc) *pmc = mc = MMU_CONTEXT_FIRST_VERSION; } diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index d276cd3edd8..10630eac68e 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -31,7 +31,7 @@ #define NVRW_CNT 0x20 /* - * Set oops header version to distingush between old and new format header. + * Set oops header version to distinguish between old and new format header. * lnx,oops-log partition max size is 4000, header version > 4000 will * help in identifying new header. */ diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h index 21c5088788d..b9d9489a501 100644 --- a/arch/sh/include/asm/mmu_context.h +++ b/arch/sh/include/asm/mmu_context.h @@ -81,7 +81,7 @@ static inline void get_mmu_context(struct mm_struct *mm, unsigned int cpu) /* * Fix version; Note that we avoid version #0 - * to distingush NO_CONTEXT. + * to distinguish NO_CONTEXT. */ if (!asid) asid = MMU_CONTEXT_FIRST_VERSION; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 903a264af98..3538a1b0aea 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -339,7 +339,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) #endif /* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. * Assumes number of cores is a power of two. */ static void amd_detect_cmp(struct cpuinfo_x86 *c) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 78e4de42efa..957e387c1cb 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1031,7 +1031,7 @@ static void i8042_controller_reset(bool force_reset) /* * i8042_panic_blink() will turn the keyboard LEDs on or off and is called * when kernel panics. Flashing LEDs is useful for users running X who may - * not see the console and will help distingushing panics from "real" + * not see the console and will help distinguishing panics from "real" * lockups. * * Note that DELAY has a limit of 10ms so we will not get stuck here diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 70c49329ca9..5c9797c7bbe 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -49,7 +49,7 @@ * can't distinguish between a clean block that has been generated * from parity calculations, and a clean block that has been * successfully written to the spare ( or to parity when resyncing). - * To distingush these states we have a stripe bit STRIPE_INSYNC that + * To distinguish these states we have a stripe bit STRIPE_INSYNC that * is set whenever a write is scheduled to the spare, or to the parity * disc if there is no spare. A sync request clears this bit, and * when we find it set with no buffers locked, we know the sync is -- cgit v1.2.3-70-g09d2 From 9536c8d2da8059b00775bd9c5a84816b608cf6f4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Oct 2013 12:14:04 +0200 Subject: perf/x86: Optimize intel_pmu_pebs_fixup_ip() There's been reports of high NMI handler overhead, highlighted by such kernel messages: [ 3697.380195] perf samples too long (10009 > 10000), lowering kernel.perf_event_max_sample_rate to 13000 [ 3697.389509] INFO: NMI handler (perf_event_nmi_handler) took too long to run: 9.331 msecs Don Zickus analyzed the source of the overhead and reported: > While there are a few places that are causing latencies, for now I focused on > the longest one first. It seems to be 'copy_user_from_nmi' > > intel_pmu_handle_irq -> > intel_pmu_drain_pebs_nhm -> > __intel_pmu_drain_pebs_nhm -> > __intel_pmu_pebs_event -> > intel_pmu_pebs_fixup_ip -> > copy_from_user_nmi > > In intel_pmu_pebs_fixup_ip(), if the while-loop goes over 50, the sum of > all the copy_from_user_nmi latencies seems to go over 1,000,000 cycles > (there are some cases where only 10 iterations are needed to go that high > too, but in generall over 50 or so). At this point copy_user_from_nmi > seems to account for over 90% of the nmi latency. The solution to that is to avoid having to call copy_from_user_nmi() for every instruction. Since we already limit the max basic block size, we can easily pre-allocate a piece of memory to copy the entire thing into in one go. Don reported this test result: > Your patch made a huge difference in improvement. The > copy_from_user_nmi() no longer hits the million of cycles. I still > have a batch of 100,000-300,000 cycles. My longest NMI paths used > to be dominated by copy_from_user_nmi, now it is not (I have to dig > up the new hot path). Reported-and-tested-by: Don Zickus Cc: jmario@redhat.com Cc: acme@infradead.org Cc: dave.hansen@linux.intel.com Cc: eranian@google.com Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131016105755.GX10651@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 52 ++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 32e9ed81cd0..c1760ff3c75 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -12,6 +12,7 @@ #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_BUFFER_SIZE PAGE_SIZE +#define PEBS_FIXUP_SIZE PAGE_SIZE /* * pebs_record_32 for p4 and core not supported @@ -228,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu) wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); } +static DEFINE_PER_CPU(void *, insn_buffer); + static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; int node = cpu_to_node(cpu); int max, thresh = 1; /* always use a single PEBS record */ - void *buffer; + void *buffer, *ibuffer; if (!x86_pmu.pebs) return 0; @@ -242,6 +245,19 @@ static int alloc_pebs_buffer(int cpu) if (unlikely(!buffer)) return -ENOMEM; + /* + * HSW+ already provides us the eventing ip; no need to allocate this + * buffer then. + */ + if (x86_pmu.intel_cap.pebs_format < 2) { + ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!ibuffer) { + kfree(buffer); + return -ENOMEM; + } + per_cpu(insn_buffer, cpu) = ibuffer; + } + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; @@ -262,6 +278,9 @@ static void release_pebs_buffer(int cpu) if (!ds || !x86_pmu.pebs) return; + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; + kfree((void *)(unsigned long)ds->pebs_buffer_base); ds->pebs_buffer_base = 0; } @@ -729,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; int is_64bit = 0; + void *kaddr; /* * We don't need to fixup if the PEBS assist is fault like @@ -752,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) * unsigned math, either ip is before the start (impossible) or * the basic block is larger than 1 page (sanity) */ - if ((ip - to) > PAGE_SIZE) + if ((ip - to) > PEBS_FIXUP_SIZE) return 0; /* @@ -763,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 1; } + if (!kernel_ip(ip)) { + int size, bytes; + u8 *buf = this_cpu_read(insn_buffer); + + size = ip - to; /* Must fit our buffer, see above */ + bytes = copy_from_user_nmi(buf, (void __user *)to, size); + if (bytes != size) + return 0; + + kaddr = buf; + } else { + kaddr = (void *)to; + } + do { struct insn insn; - u8 buf[MAX_INSN_SIZE]; - void *kaddr; old_to = to; - if (!kernel_ip(ip)) { - int bytes, size = MAX_INSN_SIZE; - - bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) - return 0; - - kaddr = buf; - } else - kaddr = (void *)to; #ifdef CONFIG_X86_64 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); #endif insn_init(&insn, kaddr, is_64bit); insn_get_length(&insn); + to += insn.length; + kaddr += insn.length; } while (to < ip); if (to == ip) { -- cgit v1.2.3-70-g09d2 From 147de14772ed897727dba7353916b02d1e0f17f4 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:30:13 -0700 Subject: ACPI, APEI, CPER: Add UEFI 2.4 support for memory error In latest UEFI spec(by now it is 2.4) memory error definition for CPER (UEFI 2.4 Appendix N Common Platform Error Record) adds some new fields. These fields help people to locate memory error to an actual DIMM location. Original-author: Tony Luck Signed-off-by: Chen, Gong Reviewed-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Acked-by: Naveen N. Rao Signed-off-by: Tony Luck --- arch/x86/kernel/cpu/mcheck/mce-apei.c | 3 +-- drivers/acpi/apei/cper.c | 7 ++++--- drivers/acpi/apei/ghes.c | 4 ++-- drivers/edac/ghes_edac.c | 5 ++--- include/linux/cper.h | 11 +++++++++-- 5 files changed, 18 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index cd8b166a173..de8b60a53f6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -42,8 +42,7 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) struct mce m; /* Only corrected MC is reported */ - if (!corrected || !(mem_err->validation_bits & - CPER_MEM_VALID_PHYSICAL_ADDRESS)) + if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA)) return; mce_setup(&m); diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index eb5f6d6d7db..946ef520186 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c @@ -8,7 +8,7 @@ * various tables, such as ERST, BERT and HEST etc. * * For more information about CPER, please refer to Appendix N of UEFI - * Specification version 2.3. + * Specification version 2.4. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version @@ -191,16 +191,17 @@ static const char *cper_mem_err_type_strs[] = { "memory sparing", "scrub corrected error", "scrub uncorrected error", + "physical memory map-out event", }; static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) { if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); - if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) + if (mem->validation_bits & CPER_MEM_VALID_PA) printk("%s""physical_address: 0x%016llx\n", pfx, mem->physical_addr); - if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) printk("%s""physical_address_mask: 0x%016llx\n", pfx, mem->physical_addr_mask); if (mem->validation_bits & CPER_MEM_VALID_NODE) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0db6e4ff650..a30bc313787 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -419,7 +419,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev) if (sec_sev == GHES_SEV_CORRECTED && (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && - (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) { + (mem_err->validation_bits & CPER_MEM_VALID_PA)) { pfn = mem_err->physical_addr >> PAGE_SHIFT; if (pfn_valid(pfn)) memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); @@ -430,7 +430,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev) } if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE && - mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + mem_err->validation_bits & CPER_MEM_VALID_PA) { pfn = mem_err->physical_addr >> PAGE_SHIFT; memory_failure_queue(pfn, 0, 0); } diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index bb534670ec0..0ad797b9db6 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -297,15 +297,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, } /* Error address */ - if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + if (mem_err->validation_bits & CPER_MEM_VALID_PA) { e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; } /* Error grain */ - if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { + if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); - } /* Memory error location, mapped on e->location */ p = e->location; diff --git a/include/linux/cper.h b/include/linux/cper.h index 09ebe211364..2fc0ec3d89c 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -218,8 +218,8 @@ enum { #define CPER_PROC_VALID_IP 0x1000 #define CPER_MEM_VALID_ERROR_STATUS 0x0001 -#define CPER_MEM_VALID_PHYSICAL_ADDRESS 0x0002 -#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK 0x0004 +#define CPER_MEM_VALID_PA 0x0002 +#define CPER_MEM_VALID_PA_MASK 0x0004 #define CPER_MEM_VALID_NODE 0x0008 #define CPER_MEM_VALID_CARD 0x0010 #define CPER_MEM_VALID_MODULE 0x0020 @@ -232,6 +232,9 @@ enum { #define CPER_MEM_VALID_RESPONDER_ID 0x1000 #define CPER_MEM_VALID_TARGET_ID 0x2000 #define CPER_MEM_VALID_ERROR_TYPE 0x4000 +#define CPER_MEM_VALID_RANK_NUMBER 0x8000 +#define CPER_MEM_VALID_CARD_HANDLE 0x10000 +#define CPER_MEM_VALID_MODULE_HANDLE 0x20000 #define CPER_PCIE_VALID_PORT_TYPE 0x0001 #define CPER_PCIE_VALID_VERSION 0x0002 @@ -347,6 +350,10 @@ struct cper_sec_mem_err { __u64 responder_id; __u64 target_id; __u8 error_type; + __u8 reserved; + __u16 rank; + __u16 mem_array_handle; /* card handle in UEFI 2.4 */ + __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ }; struct cper_sec_pcie { -- cgit v1.2.3-70-g09d2 From 09dc68d958c67c76cf672ec78b7391af453010f8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 21 Oct 2013 09:35:20 +0100 Subject: x86/cpu: Track legacy CPU model data only on 32-bit kernels struct cpu_dev's c_models is only ever set inside CONFIG_X86_32 conditionals (or code that's being built for 32-bit only), so there's no use of reserving the (empty) space for the model names in a 64-bit kernel. Similarly, c_size_cache is only used in the #else of a CONFIG_X86_64 conditional, so reserving space for (and in one case even initializing) that field is pointless for 64-bit kernels too. While moving both fields to the end of the structure, I also noticed that: - the c_models array size was one too small, potentially causing table_lookup_model() to return garbage on Intel CPUs (intel.c's instance was lacking the sentinel with family being zero), so the patch bumps that by one, - c_models' vendor sub-field was unused (and anyway redundant with the base structure's c_x86_vendor field), so the patch deletes it. Also rename the legacy fields so that their legacy nature stands out and comment their declarations. Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/5265036802000078000FC4DB@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 6 +++--- arch/x86/kernel/cpu/centaur.c | 8 +++++--- arch/x86/kernel/cpu/common.c | 12 +++++++----- arch/x86/kernel/cpu/cpu.h | 20 +++++++++++--------- arch/x86/kernel/cpu/intel.c | 12 ++++++------ arch/x86/kernel/cpu/umc.c | 4 ++-- 6 files changed, 34 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 903a264af98..3daece79a14 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -823,8 +823,8 @@ static const struct cpu_dev amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, #ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + .legacy_models = { + { .family = 4, .model_names = { [3] = "486 DX/2", [7] = "486 DX/2-WB", @@ -835,7 +835,7 @@ static const struct cpu_dev amd_cpu_dev = { } }, }, - .c_size_cache = amd_size_cache, + .legacy_cache_size = amd_size_cache, #endif .c_early_init = early_init_amd, .c_detect_tlb = cpu_detect_tlb_amd, diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index fbf6c3bc240..8d5652dc99d 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -468,10 +468,10 @@ static void init_centaur(struct cpuinfo_x86 *c) #endif } +#ifdef CONFIG_X86_32 static unsigned int centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) { -#ifdef CONFIG_X86_32 /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) size >>= 8; @@ -484,16 +484,18 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) if ((c->x86 == 6) && (c->x86_model == 9) && (c->x86_mask == 1) && (size == 65)) size -= 1; -#endif return size; } +#endif static const struct cpu_dev centaur_cpu_dev = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, - .c_size_cache = centaur_size_cache, +#ifdef CONFIG_X86_32 + .legacy_cache_size = centaur_size_cache, +#endif .c_x86_vendor = X86_VENDOR_CENTAUR, }; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2793d1f095a..9ada0b37ae0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -346,7 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) /* Look up CPU names by table lookup. */ static const char *table_lookup_model(struct cpuinfo_x86 *c) { - const struct cpu_model_info *info; +#ifdef CONFIG_X86_32 + const struct legacy_cpu_model_info *info; if (c->x86_model >= 16) return NULL; /* Range check */ @@ -354,13 +355,14 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) if (!this_cpu) return NULL; - info = this_cpu->c_models; + info = this_cpu->legacy_models; - while (info && info->family) { + while (info->family) { if (info->family == c->x86) return info->model_names[c->x86_model]; info++; } +#endif return NULL; /* Not found */ } @@ -450,8 +452,8 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); #else /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c, l2size); + if (this_cpu->legacy_cache_size) + l2size = this_cpu->legacy_cache_size(c, l2size); /* Allow user to override all this if necessary. */ if (cachesize_override != -1) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4041c24ae7d..c37dc37e831 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -1,12 +1,6 @@ #ifndef ARCH_X86_CPU_H #define ARCH_X86_CPU_H -struct cpu_model_info { - int vendor; - int family; - const char *model_names[16]; -}; - /* attempt to consolidate cpu attributes */ struct cpu_dev { const char *c_vendor; @@ -14,15 +8,23 @@ struct cpu_dev { /* some have two possibilities for cpuid string */ const char *c_ident[2]; - struct cpu_model_info c_models[4]; - void (*c_early_init)(struct cpuinfo_x86 *); void (*c_bsp_init)(struct cpuinfo_x86 *); void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); void (*c_detect_tlb)(struct cpuinfo_x86 *); - unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); int c_x86_vendor; +#ifdef CONFIG_X86_32 + /* Optional vendor specific routine to obtain the cache size. */ + unsigned int (*legacy_cache_size)(struct cpuinfo_x86 *, + unsigned int); + + /* Family/stepping-based lookup table for model names. */ + struct legacy_cpu_model_info { + int family; + const char *model_names[16]; + } legacy_models[5]; +#endif }; struct _tlb_table { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ec7299566f7..dc1ec0dff93 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -665,8 +665,8 @@ static const struct cpu_dev intel_cpu_dev = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, #ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + .legacy_models = { + { .family = 4, .model_names = { [0] = "486 DX-25/33", [1] = "486 DX-50", @@ -679,7 +679,7 @@ static const struct cpu_dev intel_cpu_dev = { [9] = "486 DX/4-WB" } }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { .family = 5, .model_names = { [0] = "Pentium 60/66 A-step", [1] = "Pentium 60/66", @@ -690,7 +690,7 @@ static const struct cpu_dev intel_cpu_dev = { [8] = "Mobile Pentium MMX" } }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { .family = 6, .model_names = { [0] = "Pentium Pro A-step", [1] = "Pentium Pro", @@ -704,7 +704,7 @@ static const struct cpu_dev intel_cpu_dev = { [11] = "Pentium III (Tualatin)", } }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { .family = 15, .model_names = { [0] = "Pentium 4 (Unknown)", [1] = "Pentium 4 (Willamette)", @@ -714,7 +714,7 @@ static const struct cpu_dev intel_cpu_dev = { } }, }, - .c_size_cache = intel_size_cache, + .legacy_cache_size = intel_size_cache, #endif .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index 202759a1412..75c5ad5d35c 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -11,8 +11,8 @@ static const struct cpu_dev umc_cpu_dev = { .c_vendor = "UMC", .c_ident = { "UMC UMC UMC" }, - .c_models = { - { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = + .legacy_models = { + { .family = 4, .model_names = { [1] = "U5D", [2] = "U5S", -- cgit v1.2.3-70-g09d2 From e8a923cc1fff6e627f906655ad52ee694ef2f6d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Oct 2013 15:32:10 +0200 Subject: perf/x86: Fix NMI measurements OK, so what I'm actually seeing on my WSM is that sched/clock.c is 'broken' for the purpose we're using it for. What triggered it is that my WSM-EP is broken :-( [ 0.001000] tsc: Fast TSC calibration using PIT [ 0.002000] tsc: Detected 2533.715 MHz processor [ 0.500180] TSC synchronization [CPU#0 -> CPU#6]: [ 0.505197] Measured 3 cycles TSC warp between CPUs, turning off TSC clock. [ 0.004000] tsc: Marking TSC unstable due to check_tsc_sync_source failed For some reason it consistently detects TSC skew, even though NHM+ should have a single clock domain for 'reasonable' systems. This marks sched_clock_stable=0, which means that we do fancy stuff to try and get a 'sane' clock. Part of this fancy stuff relies on the tick, clearly that's gone when NOHZ=y. So for idle cpus time gets stuck, until it either wakes up or gets kicked by another cpu. While this is perfectly fine for the scheduler -- it only cares about actually running stuff, and when we're running stuff we're obviously not idle. This does somewhat break down for perf which can trigger events just fine on an otherwise idle cpu. So I've got NMIs get get 'measured' as taking ~1ms, which actually don't last nearly that long: -0 [013] d.h. 886.311970: rcu_nmi_enter <-do_nmi ... -0 [013] d.h. 886.311997: perf_sample_event_took: HERE!!! : 1040990 So ftrace (which uses sched_clock(), not the fancy bits) only sees ~27us, but we measure ~1ms !! Now since all this measurement stuff lives in x86 code, we can actually fix it. Signed-off-by: Peter Zijlstra Cc: mingo@kernel.org Cc: dave.hansen@linux.intel.com Cc: eranian@google.com Cc: Don Zickus Cc: jmario@redhat.com Cc: acme@infradead.org Link: http://lkml.kernel.org/r/20131017133350.GG3364@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- arch/x86/kernel/nmi.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d8449158cf..8a87a322412 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - int ret; u64 start_clock; u64 finish_clock; + int ret; if (!atomic_read(&active_events)) return NMI_DONE; - start_clock = local_clock(); + start_clock = sched_clock(); ret = x86_pmu.handle_irq(regs); - finish_clock = local_clock(); + finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ba77ebc2c35..6fcb49ce50a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 u64 before, delta, whole_msecs; int remainder_ns, decimal_msecs, thishandled; - before = local_clock(); + before = sched_clock(); thishandled = a->handler(type, regs); handled += thishandled; - delta = local_clock() - before; + delta = sched_clock() - before; trace_nmi_handler(a->handler, (int)delta, thishandled); if (delta < nmi_longest_ns) -- cgit v1.2.3-70-g09d2 From d68ce0177c1e51fb332369e0c99852a6d05668af Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 11 Oct 2013 16:07:31 -0700 Subject: x86, hyperv: Fix build error due to missing include 9e7827b5ea4c ("x86, hyperv: Get the local APIC timer frequency from the hypervisor") breaks the build with some configs because apic.h isn't directly included: arch/x86/kernel/cpu/mshyperv.c: In function 'ms_hyperv_init_platform': arch/x86/kernel/cpu/mshyperv.c:90:3: error: 'lapic_timer_frequency' undeclared (first use in this function) arch/x86/kernel/cpu/mshyperv.c:90:3: note: each undeclared identifier is reported only once for each function it appears in Fix it by including asm/apic.h. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1310111604160.31170@chino.kir.corp.google.com Acked-by: K. Y. Srinivasan Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 628ff50ab75..9f6e9f89d9d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -25,6 +25,7 @@ #include #include #include +#include struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); -- cgit v1.2.3-70-g09d2 From a477c8594bee3bff639739c48258a8c737ab721e Mon Sep 17 00:00:00 2001 From: HATAYAMA Daisuke Date: Tue, 5 Nov 2013 02:15:48 +0900 Subject: x86/cpu: Always print SMP information in /proc/cpuinfo Currently show_cpuinfo_core() displays cpu core information only if the number of threads per a whole cores is 2 or larger. However, this condition doesn't care about the number of sockets. For example, this condition doesn't hold on systems with two logical cpus consisting of two sockets and a single core on each socket - yet the topology information would be interesting to see in that case as well. I don't know whether or not there are processors in real world by which such configurations are possible, but at least on vitual machine environments, such configuration can occur, typically when no explicit SMP information is provided in advance. For example, on qemu/KVM, SMP information is specified via -smp command-line option, more specifically, its syntax is: -smp n[,cores=cores][,threads=threads][,sockets=sockets][,maxcpus=maxcpus] If this is not specified, qemu tells configuration with n-sockets, 1-core and 1-thread to the guest machine, on which guest, MP information is not displayed in /proc/cpuinfo. I saw this situation on VMWare guest environment, too. To fix this issue, this patch simply removes the condition because this information is useful even if there's only 1 thread. Signed-off-by: HATAYAMA Daisuke Cc: Vivek Goyal Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5277D644.4090707@jp.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/proc.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index aee6317b902..06fe3ed8b85 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -11,15 +11,12 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, unsigned int cpu) { #ifdef CONFIG_SMP - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpumask_weight(cpu_core_mask(cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - seq_printf(m, "apicid\t\t: %d\n", c->apicid); - seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); - } + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", cpumask_weight(cpu_core_mask(cpu))); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + seq_printf(m, "apicid\t\t: %d\n", c->apicid); + seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); #endif } -- cgit v1.2.3-70-g09d2 From 0a196848ca365ec582c6d86659be456be6d4ed96 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2013 21:16:22 +0100 Subject: perf: Fix arch_perf_out_copy_user default The arch_perf_output_copy_user() default of __copy_from_user_inatomic() returns bytes not copied, while all other argument functions given DEFINE_OUTPUT_COPY() return bytes copied. Since copy_from_user_nmi() is the odd duck out by returning bytes copied where all other *copy_{to,from}* functions return bytes not copied, change it over and ammend DEFINE_OUTPUT_COPY() to expect bytes not copied. Oddly enough DEFINE_OUTPUT_COPY() already returned bytes not copied while expecting its worker functions to return bytes copied. Signed-off-by: Peter Zijlstra Acked-by: will.deacon@arm.com Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20131030201622.GR16117@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 +- arch/x86/lib/usercopy.c | 2 +- arch/x86/oprofile/backtrace.c | 4 ++-- kernel/events/internal.h | 35 ++++++++++++++++++++++-------- 6 files changed, 33 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8a87a322412..8e132931614 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1989,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) @@ -2041,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c1760ff3c75..ae96cfa5edd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -789,7 +789,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) size = ip - to; /* Must fit our buffer, see above */ bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) + if (bytes != 0) return 0; kaddr = buf; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 90ee6c1d054..d82d155aca8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -491,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) /* may fail if text not present */ bytes = copy_from_user_nmi(buf, (void __user *)from, size); - if (bytes != size) + if (bytes != 0) return X86_BR_NONE; addr = buf; diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 5465b861394..ddf9ecb53cc 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -31,6 +31,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) ret = __copy_from_user_inatomic(to, from, n); pagefault_enable(); - return n - ret; + return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index d6aa6e8315d..5d04be5efb6 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head) unsigned long bytes; bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); - if (bytes != sizeof(bufhead)) + if (bytes != 0) return NULL; fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); @@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head) unsigned long bytes; bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); - if (bytes != sizeof(bufhead)) + if (bytes != 0) return NULL; oprofile_add_trace(bufhead[0].return_address); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ca6599723be..569b218782a 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) } #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ -static inline unsigned int \ +static inline unsigned long \ func_name(struct perf_output_handle *handle, \ - const void *buf, unsigned int len) \ + const void *buf, unsigned long len) \ { \ unsigned long size, written; \ \ do { \ - size = min_t(unsigned long, handle->size, len); \ - \ + size = min(handle->size, len); \ written = memcpy_func(handle->addr, buf, size); \ + written = size - written; \ \ len -= written; \ handle->addr += written; \ @@ -110,20 +110,37 @@ func_name(struct perf_output_handle *handle, \ return len; \ } -static inline int memcpy_common(void *dst, const void *src, size_t n) +static inline unsigned long +memcpy_common(void *dst, const void *src, unsigned long n) { memcpy(dst, src, n); - return n; + return 0; } DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) -#define MEMCPY_SKIP(dst, src, n) (n) +static inline unsigned long +memcpy_skip(void *dst, const void *src, unsigned long n) +{ + return 0; +} -DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) +DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip) #ifndef arch_perf_out_copy_user -#define arch_perf_out_copy_user __copy_from_user_inatomic +#define arch_perf_out_copy_user arch_perf_out_copy_user + +static inline unsigned long +arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) +{ + unsigned long ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, n); + pagefault_enable(); + + return ret; +} #endif DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) -- cgit v1.2.3-70-g09d2 From d1e8f4a8365d16fd179cb8cad5b4d20aafd0a695 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 31 Oct 2013 13:36:54 +0800 Subject: perf/x86/intel/uncore: Add filter support for IvyBridge-EP QPI boxes The encoding for filter registers of IvyBridge-EP uncore QPI boxes is completely the same as SandyBridge-EP. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Cc: eranian@google.com Cc: "Yan Zheng" Link: http://lkml.kernel.org/r/1383197815-17706-1-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 61 ++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 4118f9f6831..6da399943ac 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1099,6 +1099,24 @@ static struct attribute *ivt_uncore_qpi_formats_attr[] = { &format_attr_umask.attr, &format_attr_edge.attr, &format_attr_thresh8.attr, + &format_attr_match_rds.attr, + &format_attr_match_rnid30.attr, + &format_attr_match_rnid4.attr, + &format_attr_match_dnid.attr, + &format_attr_match_mc.attr, + &format_attr_match_opc.attr, + &format_attr_match_vnw.attr, + &format_attr_match0.attr, + &format_attr_match1.attr, + &format_attr_mask_rds.attr, + &format_attr_mask_rnid30.attr, + &format_attr_mask_rnid4.attr, + &format_attr_mask_dnid.attr, + &format_attr_mask_mc.attr, + &format_attr_mask_opc.attr, + &format_attr_mask_vnw.attr, + &format_attr_mask0.attr, + &format_attr_mask1.attr, NULL, }; @@ -1312,17 +1330,30 @@ static struct intel_uncore_type ivt_uncore_imc = { IVT_UNCORE_PCI_COMMON_INIT(), }; +static struct intel_uncore_ops ivt_uncore_qpi_ops = { + .init_box = ivt_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_qpi_enable_event, + .read_counter = snbep_uncore_pci_read_counter, + .hw_config = snbep_qpi_hw_config, + .get_constraint = uncore_get_constraint, + .put_constraint = uncore_put_constraint, +}; + static struct intel_uncore_type ivt_uncore_qpi = { - .name = "qpi", - .num_counters = 4, - .num_boxes = 3, - .perf_ctr_bits = 48, - .perf_ctr = SNBEP_PCI_PMON_CTR0, - .event_ctl = SNBEP_PCI_PMON_CTL0, - .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, - .box_ctl = SNBEP_PCI_PMON_BOX_CTL, - .ops = &ivt_uncore_pci_ops, - .format_group = &ivt_uncore_qpi_format_group, + .name = "qpi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = SNBEP_PCI_PMON_CTR0, + .event_ctl = SNBEP_PCI_PMON_CTL0, + .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &ivt_uncore_qpi_ops, + .format_group = &ivt_uncore_qpi_format_group, }; static struct intel_uncore_type ivt_uncore_r2pcie = { @@ -1429,6 +1460,16 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), + }, + { /* QPI Port 0 filter */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), + }, { /* end: all zeroes */ } }; -- cgit v1.2.3-70-g09d2 From f891d8cfb8372eb9cfe9d0d4ca61c75bafeaae37 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 31 Oct 2013 13:36:55 +0800 Subject: perf/x86/intel: Add Ivy Bridge-EP uncore IRP box support Unlike other uncore boxes, IRP boxes live in PCI buses with no UBOX device. For PCI bus without UBOX device, we find the next bus that has UBOX device and use its 'bus to socket' mapping. Besides the counter/control registers in IRP boxes are not properly aligned. Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Cc: eranian@google.com Cc: "Yan Zheng" Link: http://lkml.kernel.org/r/1383197815-17706-2-git-send-email-zheng.z.yan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 73 +++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 6da399943ac..29c248799ce 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -997,6 +997,20 @@ static int snbep_pci2phy_map_init(int devid) } } + if (!err) { + /* + * For PCI bus with no UBOX device, find the next bus + * that has UBOX device and use its mapping. + */ + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (pcibus_to_physid[bus] >= 0) + i = pcibus_to_physid[bus]; + else + pcibus_to_physid[bus] = i; + } + } + if (ubox_dev) pci_dev_put(ubox_dev); @@ -1330,6 +1344,59 @@ static struct intel_uncore_type ivt_uncore_imc = { IVT_UNCORE_PCI_COMMON_INIT(), }; +/* registers in IRP boxes are not properly aligned */ +static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; +static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; + +static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], + hwc->config | SNBEP_PMON_CTL_EN); +} + +static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + + pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config); +} + +static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + +static struct intel_uncore_ops ivt_uncore_irp_ops = { + .init_box = ivt_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = ivt_uncore_irp_disable_event, + .enable_event = ivt_uncore_irp_enable_event, + .read_counter = ivt_uncore_irp_read_counter, +}; + +static struct intel_uncore_type ivt_uncore_irp = { + .name = "irp", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .event_mask = IVT_PMON_RAW_EVENT_MASK, + .box_ctl = SNBEP_PCI_PMON_BOX_CTL, + .ops = &ivt_uncore_irp_ops, + .format_group = &ivt_uncore_format_group, +}; + static struct intel_uncore_ops ivt_uncore_qpi_ops = { .init_box = ivt_uncore_pci_init_box, .disable_box = snbep_uncore_pci_disable_box, @@ -1377,6 +1444,7 @@ static struct intel_uncore_type ivt_uncore_r3qpi = { enum { IVT_PCI_UNCORE_HA, IVT_PCI_UNCORE_IMC, + IVT_PCI_UNCORE_IRP, IVT_PCI_UNCORE_QPI, IVT_PCI_UNCORE_R2PCIE, IVT_PCI_UNCORE_R3QPI, @@ -1385,6 +1453,7 @@ enum { static struct intel_uncore_type *ivt_pci_uncores[] = { [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, + [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp, [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, @@ -1432,6 +1501,10 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), }, + { /* IRP */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), + .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0), + }, { /* QPI0 Port 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), -- cgit v1.2.3-70-g09d2 From 4c08edd305019061bf1ac95ce089497bdbb8b8ac Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 6 Nov 2013 10:00:05 -0800 Subject: x86, hyperv: Move a variable to avoid an unused variable warning The variable hv_lapic_frequency causes an unused variable warning if CONFIG_X86_LOCAL_APIC is disabled. Since the variable is only used inside a small if statement, move the declaration of that variable into the if statement itself. Cc: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1381444224-3303-1-git-send-email-kys@microsoft.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9f6e9f89d9d..9f7ca266864 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -70,8 +70,6 @@ static struct clocksource hyperv_cs = { static void __init ms_hyperv_init_platform(void) { - u64 hv_lapic_frequency; - /* * Extract the features and hints */ @@ -86,6 +84,8 @@ static void __init ms_hyperv_init_platform(void) /* * Get the APIC frequency. */ + u64 hv_lapic_frequency; + rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); lapic_timer_frequency = hv_lapic_frequency; -- cgit v1.2.3-70-g09d2 From cf30d52e2d11523c42048ab89ed4215b5021526a Mon Sep 17 00:00:00 2001 From: Maria Dimakopoulou Date: Thu, 5 Dec 2013 01:24:37 +0200 Subject: perf/x86: Fix constraint table end marker bug The EVENT_CONSTRAINT_END() macro defines the end marker as a constraint with a weight of zero. This was all fine until we blacklisted the corrupting memory events on Intel IvyBridge. These events are blacklisted by using a counter bitmask of zero. Thus, they also get a constraint weight of zero. The iteration macro: for_each_constraint tests the weight==0. Therefore, it was stopping at the first blacklisted event, i.e., 0xd0. The corrupting events were therefore considered as unconstrained and were scheduled on any of the generic counters. This patch fixes the end marker to have a weight of -1. With this, the blacklisted events get an empty constraint and cannot be scheduled which is what we want for now. Signed-off-by: Maria Dimakopoulou Reviewed-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: jolsa@redhat.com Cc: zheng.z.yan@intel.com Link: http://lkml.kernel.org/r/20131204232437.GA10689@starlight Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index fd00bb29425..c1a861829d8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,11 +262,20 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) +/* + * We define the end marker as having a weight of -1 + * to enable blacklisting of events using a counter bitmask + * of zero and thus a weight of zero. + * The end marker has a weight that cannot possibly be + * obtained from counting the bits in the bitmask. + */ +#define EVENT_CONSTRAINT_END { .weight = -1 } +/* + * Check for end marker with weight == -1 + */ #define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->weight; (e)++) + for ((e) = (c); (e)->weight != -1; (e)++) /* * Extra registers for specific events. -- cgit v1.2.3-70-g09d2 From 40e2d7f9b5dae048789c64672bf3027fbb663ffa Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 18 Dec 2013 16:44:57 -0500 Subject: x86 idle: Repair large-server 50-watt idle-power regression Linux 3.10 changed the timing of how thread_info->flags is touched: x86: Use generic idle loop (7d1a941731fabf27e5fb6edbebb79fe856edb4e5) This caused Intel NHM-EX and WSM-EX servers to experience a large number of immediate MONITOR/MWAIT break wakeups, which caused cpuidle to demote from deep C-states to shallow C-states, which caused these platforms to experience a significant increase in idle power. Note that this issue was already present before the commit above, however, it wasn't seen often enough to be noticed in power measurements. Here we extend an errata workaround from the Core2 EX "Dunnington" to extend to NHM-EX and WSM-EX, to prevent these immediate returns from MWAIT, reducing idle power on these platforms. While only acpi_idle ran on Dunnington, intel_idle may also run on these two newer systems. As of today, there are no other models that are known to need this tweak. Link: http://lkml.kernel.org/r/CAJvTdK=%2BaNN66mYpCGgbHGCHhYQAKx-vB0kJSWjVpsNb_hOAtQ@mail.gmail.com Signed-off-by: Len Brown Link: http://lkml.kernel.org/r/baff264285f6e585df757d58b17788feabc68918.1387403066.git.len.brown@intel.com Cc: # 3.12.x, 3.11.x, 3.10.x Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 3 ++- drivers/idle/intel_idle.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dc1ec0dff93..ea04b342c02 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -387,7 +387,8 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + if (c->x86 == 6 && cpu_has_clflush && + (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); #ifdef CONFIG_X86_64 diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 92d1206482a..f80b700f821 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -377,6 +377,9 @@ static int intel_idle(struct cpuidle_device *dev, if (!current_set_polling_and_test()) { + if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) -- cgit v1.2.3-70-g09d2