diff options
Diffstat (limited to 'arch/x86/kernel')
26 files changed, 171 insertions, 170 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1efd3789e3d..06c196d7e59 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -49,8 +49,8 @@ #include <asm/mtrr.h> #include <asm/smp.h> #include <asm/mce.h> -#include <asm/kvm_para.h> #include <asm/tsc.h> +#include <asm/hypervisor.h> unsigned int num_processors; @@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); - if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { + if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; /* Make LAPIC timer preferrable over percpu HPET */ lapic_clockevent.rating = 150; @@ -1476,7 +1476,8 @@ void __init enable_IR_x2apic(void) /* IR is required if there is APIC ID > 255 even when running * under KVM */ - if (max_physical_apicid > 255 || !kvm_para_available()) + if (max_physical_apicid > 255 || + !hypervisor_x2apic_available()) goto nox2apic; /* * without IR all CPUs can be addressed by IOAPIC/MSI diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 72ec29e1ae0..79fd43ca6f9 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -68,7 +68,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; default: @@ -96,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, static __read_mostly struct notifier_block backtrace_notifier = { .notifier_call = arch_trigger_all_cpu_backtrace_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static int __init register_trigger_all_cpu_backtrace(void) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 52735a710c3..697dc34b7b8 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2329,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) unsigned int irr; struct irq_desc *desc; struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); if (irq == -1) continue; @@ -2363,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); goto unlock; } - __get_cpu_var(vector_irq)[vector] = -1; + __this_cpu_write(vector_irq[vector], -1); unlock: raw_spin_unlock(&desc->lock); } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f4f9e95aa15..bd16b58b885 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -120,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) else if (!strcmp(oem_table_id, "UVX")) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { - __get_cpu_var(x2apic_extra_bits) = - pnodeid << uvh_apicid.s.pnode_shift; + __this_cpu_write(x2apic_extra_bits, + pnodeid << uvh_apicid.s.pnode_shift); uv_system_type = UV_NON_UNIQUE_APIC; uv_set_apicid_hibit(); return 1; @@ -286,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x) unsigned int id; WARN_ON(preemptible() && num_online_cpus() > 1); - id = x | __get_cpu_var(x2apic_extra_bits); + id = x | __this_cpu_read(x2apic_extra_bits); return id; } @@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = { static __cpuinit void set_x2apic_extra_bits(int pnode) { - __get_cpu_var(x2apic_extra_bits) = (pnode << uvh_apicid.s.pnode_shift); + __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); } /* @@ -641,7 +641,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMI_IPI) + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9e093f8fe78..7c7bedb83c5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383); bool cpu_has_amd_erratum(const int *erratum) { - struct cpuinfo_x86 *cpu = ¤t_cpu_data; + struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info); int osvw_id = *erratum++; u32 range; u32 ms; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 491977baf6c..35c7e65e59b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc) *rc = -ENODEV; - if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) + if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD) return; eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); @@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) static void query_values_on_cpu(void *_err) { int *err = _err; - struct powernow_k8_data *data = __get_cpu_var(powernow_data); + struct powernow_k8_data *data = __this_cpu_read(powernow_data); *err = query_current_values_with_pending_wait(data); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9ecf81f9b90..7283e98deaa 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -265,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, line_size = l2.line_size; lines_per_tag = l2.lines_per_tag; /* cpu_data has errata corrections for K7 applied */ - size_in_kb = current_cpu_data.x86_cache_size; + size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); break; case 3: if (!l3.val) @@ -287,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; if (assoc == 0xffff) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7bfed..a7797197956 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -25,6 +25,7 @@ #include <linux/gfp.h> #include <asm/mce.h> #include <asm/apic.h> +#include <asm/nmi.h> /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self, static struct notifier_block mce_raise_nb = { .notifier_call = mce_raise_notify, - .priority = 1000, + .priority = NMI_LOCAL_NORMAL_PRIOR, }; /* Inject mce on current CPU */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7a35b72d7c0..d916183b7f9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { - unsigned bank = __get_cpu_var(injectm.bank); + unsigned bank = __this_cpu_read(injectm.bank); if (msr == rip_msr) return offsetof(struct mce, ip); @@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr) { u64 v; - if (__get_cpu_var(injectm).finished) { + if (__this_cpu_read(injectm.finished)) { int offset = msr_to_offset(msr); if (offset < 0) @@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr) static void mce_wrmsrl(u32 msr, u64 v) { - if (__get_cpu_var(injectm).finished) { + if (__this_cpu_read(injectm.finished)) { int offset = msr_to_offset(msr); if (offset >= 0) @@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data) WARN_ON(smp_processor_id() != data); - if (mce_available(¤t_cpu_data)) { + if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); } @@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev) static int mce_resume(struct sys_device *dev) { __mcheck_cpu_init_generic(); - __mcheck_cpu_init_vendor(¤t_cpu_data); + __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); return 0; } @@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev) static void mce_cpu_restart(void *data) { del_timer_sync(&__get_cpu_var(mce_timer)); - if (!mce_available(¤t_cpu_data)) + if (!mce_available(__this_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); __mcheck_cpu_init_timer(); @@ -1790,7 +1790,7 @@ static void mce_restart(void) /* Toggle features for corrected errors */ static void mce_disable_ce(void *all) { - if (!mce_available(¤t_cpu_data)) + if (!mce_available(__this_cpu_ptr(&cpu_info))) return; if (all) del_timer_sync(&__get_cpu_var(mce_timer)); @@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all) static void mce_enable_ce(void *all) { - if (!mce_available(¤t_cpu_data)) + if (!mce_available(__this_cpu_ptr(&cpu_info))) return; cmci_reenable(); cmci_recheck(); @@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(¤t_cpu_data)) + if (!mce_available(__this_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) @@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(¤t_cpu_data)) + if (!mce_available(__this_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 6fcd0936194..8694ef56459 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -130,7 +130,7 @@ void cmci_recheck(void) unsigned long flags; int banks; - if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) + if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) return; local_irq_save(flags); machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a360d14659..9d977a2ea69 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -997,8 +997,7 @@ x86_perf_event_set_period(struct perf_event *event) static void x86_pmu_enable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (cpuc->enabled) + if (__this_cpu_read(cpu_hw_events.enabled)) __x86_pmu_enable_event(&event->hw, ARCH_PERFMON_EVENTSEL_ENABLE); } @@ -1268,11 +1267,10 @@ perf_event_nmi_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); - if (this_nmi != __get_cpu_var(pmu_nmi).marked) + if (this_nmi != __this_cpu_read(pmu_nmi.marked)) /* let the kernel handle the unknown nmi */ return NOTIFY_DONE; /* @@ -1296,8 +1294,8 @@ perf_event_nmi_handler(struct notifier_block *self, this_nmi = percpu_read(irq_stat.__nmi_count); if ((handled > 1) || /* the next nmi could be a back-to-back nmi */ - ((__get_cpu_var(pmu_nmi).marked == this_nmi) && - (__get_cpu_var(pmu_nmi).handled > 1))) { + ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && + (__this_cpu_read(pmu_nmi.handled) > 1))) { /* * We could have two subsequent back-to-back nmis: The * first handles more than one counter, the 2nd @@ -1308,8 +1306,8 @@ perf_event_nmi_handler(struct notifier_block *self, * handling more than one counter. We will mark the * next (3rd) and then drop it if unhandled. */ - __get_cpu_var(pmu_nmi).marked = this_nmi + 1; - __get_cpu_var(pmu_nmi).handled = handled; + __this_cpu_write(pmu_nmi.marked, this_nmi + 1); + __this_cpu_write(pmu_nmi.handled, handled); } return NOTIFY_STOP; @@ -1318,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self, static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static struct event_constraint unconstrained; @@ -1484,11 +1482,9 @@ static inline void x86_pmu_read(struct perf_event *event) */ static void x86_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - perf_pmu_disable(pmu); - cpuc->group_flag |= PERF_EVENT_TXN; - cpuc->n_txn = 0; + __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); + __this_cpu_write(cpu_hw_events.n_txn, 0); } /* @@ -1498,14 +1494,12 @@ static void x86_pmu_start_txn(struct pmu *pmu) */ static void x86_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - cpuc->group_flag &= ~PERF_EVENT_TXN; + __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* * Truncate the collected events. */ - cpuc->n_added -= cpuc->n_txn; - cpuc->n_events -= cpuc->n_txn; + __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); + __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); perf_pmu_enable(pmu); } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 24e390e40f2..008835c1d79 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -649,7 +649,7 @@ static void intel_pmu_enable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__get_cpu_var(cpu_hw_events).enabled) + if (!__this_cpu_read(cpu_hw_events.enabled)) return; intel_pmu_enable_bts(hwc->config); @@ -679,7 +679,7 @@ static int intel_pmu_save_and_restart(struct perf_event *event) static void intel_pmu_reset(void) { - struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; + struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); unsigned long flags; int idx; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 81400b93e69..e56b9bfbabd 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -753,19 +753,21 @@ out: static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) { - int overflow = 0; - u32 low, high; + u64 v; - rdmsr(hwc->config_base + hwc->idx, low, high); - - /* we need to check high bit for unflagged overflows */ - if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { - overflow = 1; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, - ((u64)low) & ~P4_CCCR_OVF); + /* an official way for overflow indication */ + rdmsrl(hwc->config_base + hwc->idx, v); + if (v & P4_CCCR_OVF) { + wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); + return 1; } - return overflow; + /* it might be unflagged overflow */ + rdmsrl(hwc->event_base + hwc->idx, v); + if (!(v & ARCH_P4_CNTRVAL_MASK)) + return 1; + + return 0; } static void p4_pmu_disable_pebs(void) @@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = { */ .num_counters = ARCH_P4_MAX_CCCR, .apic = 1, - .cntval_bits = 40, - .cntval_mask = (1ULL << 40) - 1, - .max_period = (1ULL << 39) - 1, + .cntval_bits = ARCH_P4_CNTRVAL_BITS, + .cntval_mask = ARCH_P4_CNTRVAL_MASK, + .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8474c998cbd..d6fb146c0d8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -197,14 +197,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) */ void dump_stack(void) { - unsigned long bp = 0; unsigned long stack; -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - printk("Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e3ba417e869..d3b895f375d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64) ENTRY(save_args) XCPT_FRAME cld - movq_cfi rdi, RDI+16-ARGOFFSET - movq_cfi rsi, RSI+16-ARGOFFSET - movq_cfi rdx, RDX+16-ARGOFFSET - movq_cfi rcx, RCX+16-ARGOFFSET - movq_cfi rax, RAX+16-ARGOFFSET - movq_cfi r8, R8+16-ARGOFFSET - movq_cfi r9, R9+16-ARGOFFSET - movq_cfi r10, R10+16-ARGOFFSET - movq_cfi r11, R11+16-ARGOFFSET - - leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ + /* + * start from rbp in pt_regs and jump over + * return address. + */ + movq_cfi rdi, RDI+8-RBP + movq_cfi rsi, RSI+8-RBP + movq_cfi rdx, RDX+8-RBP + movq_cfi rcx, RCX+8-RBP + movq_cfi rax, RAX+8-RBP + movq_cfi r8, R8+8-RBP + movq_cfi r9, R9+8-RBP + movq_cfi r10, R10+8-RBP + movq_cfi r11, R11+8-RBP + + leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) @@ -782,8 +786,9 @@ END(interrupt) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - subq $ORIG_RAX-ARGOFFSET+8, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 + /* reserve pt_regs for scratch regs and rbp */ + subq $ORIG_RAX-RBP, %rsp + CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP call save_args PARTIAL_FRAME 0 call \func @@ -808,9 +813,14 @@ ret_from_intr: TRACE_IRQS_OFF decl PER_CPU_VAR(irq_count) leaveq + CFI_RESTORE rbp CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 + + /* we did not save rbx, restore only from ARGOFFSET */ + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 exit_intr: GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 298448656b6..382eb2936d4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -170,9 +170,9 @@ static void ftrace_mod_code(void) void ftrace_nmi_enter(void) { - __get_cpu_var(save_modifying_code) = modifying_code; + __this_cpu_write(save_modifying_code, modifying_code); - if (!__get_cpu_var(save_modifying_code)) + if (!__this_cpu_read(save_modifying_code)) return; if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { @@ -186,7 +186,7 @@ void ftrace_nmi_enter(void) void ftrace_nmi_exit(void) { - if (!__get_cpu_var(save_modifying_code)) + if (!__this_cpu_read(save_modifying_code)) return; /* Finish all executions before clearing nmi_running */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 42c59425450..02f07634d26 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) return -EBUSY; set_debugreg(info->address, i); - __get_cpu_var(cpu_debugreg[i]) = info->address; + __this_cpu_write(cpu_debugreg[i], info->address); dr7 = &__get_cpu_var(cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); @@ -397,12 +397,12 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) void hw_breakpoint_restore(void) { - set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); - set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); - set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); - set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0); + set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1); + set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2); + set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3); set_debugreg(current->thread.debugreg6, 6); - set_debugreg(__get_cpu_var(cpu_dr7), 7); + set_debugreg(__this_cpu_read(cpu_dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 83ec0175f98..3a43caa3beb 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -234,7 +234,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) exit_idle(); irq_enter(); - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); if (!handle_irq(irq, regs)) { ack_APIC_irq(); @@ -350,12 +350,12 @@ void fixup_irqs(void) for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irr; - if (__get_cpu_var(vector_irq)[vector] < 0) + if (__this_cpu_read(vector_irq[vector]) < 0) continue; irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); if (irr & (1 << (vector % 32))) { - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); data = irq_get_irq_data(irq); raw_spin_lock(&desc->lock); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 96656f20775..48ff6dcffa0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -79,7 +79,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) u32 *isp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); - irqctx = __get_cpu_var(hardirq_ctx); + irqctx = __this_cpu_read(hardirq_ctx); /* * this is where we switch to the IRQ stack. However, if we are @@ -166,7 +166,7 @@ asmlinkage void do_softirq(void) if (local_softirq_pending()) { curctx = current_thread_info(); - irqctx = __get_cpu_var(softirq_ctx); + irqctx = __this_cpu_read(softirq_ctx); irqctx->tinfo.task = curctx->task; irqctx->tinfo.previous_esp = current_stack_pointer; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index cd21b654dec..a4130005028 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -48,6 +48,7 @@ #include <asm/apicdef.h> #include <asm/system.h> #include <asm/apic.h> +#include <asm/nmi.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMI_IPI: - /* Just ignore, we will handle the roundup on DIE_NMI. */ - return NOTIFY_DONE; - case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; @@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = { /* * Lowest-prio notifier priority, we want to be notified last: */ - .priority = -INT_MAX, + .priority = NMI_LOCAL_LOW_PRIOR, }; /** diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 5940282bd2f..d91c477b3f6 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; @@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); if (is_IF_modifier(p->ainsn.insn)) @@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_enable_no_resched(); return 1; } else if (kprobe_running()) { - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); return 1; @@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { - __get_cpu_var(current_kprobe) = &ri->rp->kp; + __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __get_cpu_var(current_kprobe) = NULL; + __this_cpu_write(current_kprobe, NULL); } recycle_rp_inst(ri, &empty_rp); @@ -1202,10 +1202,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; regs->orig_ax = ~0UL; - __get_cpu_var(current_kprobe) = &op->kp; + __this_cpu_write(current_kprobe, &op->kp); kcb->kprobe_status = KPROBE_HIT_ACTIVE; opt_pre_handler(&op->kp, regs); - __get_cpu_var(current_kprobe) = NULL; + __this_cpu_write(current_kprobe, NULL); } preempt_enable_no_resched(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c852041bfc3..09c08a1c706 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -446,7 +446,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); trace_cpu_idle((ax>>4)+1, smp_processor_id()); if (!need_resched()) { - if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); __monitor((void *)¤t_thread_info()->flags, 0, 0); @@ -462,7 +462,7 @@ static void mwait_idle(void) if (!need_resched()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle(1, smp_processor_id()); - if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); __monitor((void *)¤t_thread_info()->flags, 0, 0); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c495aa8d481..fc7aae1e2bc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -18,6 +18,7 @@ #include <asm/pci_x86.h> #include <asm/virtext.h> #include <asm/cpu.h> +#include <asm/nmi.h> #ifdef CONFIG_X86_32 # include <linux/ctype.h> @@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI_IPI) + if (val != DIE_NMI) return NOTIFY_OK; cpu = raw_smp_processor_id(); @@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void) static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, + /* we want to be the first one called */ + .priority = NMI_LOCAL_HIGH_PRIOR+1, }; /* Halt all other CPUs, calling the specified function on each of them diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5fdc0950da1..763df77343d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -427,7 +427,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, c->llc_shared_map); - if (current_cpu_data.x86_max_cores == 1) { + if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); c->booted_cores = 1; return; @@ -1089,7 +1089,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) preempt_disable(); smp_cpu_index_default(); - current_cpu_data = boot_cpu_data; + memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info)); cpumask_copy(cpu_callin_mask, cpumask_of(0)); mb(); /* @@ -1383,7 +1383,7 @@ void play_dead_common(void) mb(); /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); /* * With physical CPU hotplug, we should halt the cpu @@ -1403,11 +1403,11 @@ static inline void mwait_play_dead(void) int i; void *mwait_ptr; - if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) + if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT)) return; - if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) + if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) return; - if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) return; eax = CPUID_MWAIT_LEAF; @@ -1458,7 +1458,7 @@ static inline void mwait_play_dead(void) static inline void hlt_play_dead(void) { - if (current_cpu_data.x86 >= 4) + if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); while (1) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c76aaca5694..b9b67166f9d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -84,6 +84,11 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); static inline void conditional_sti(struct pt_regs *regs) { @@ -310,15 +315,15 @@ static int __init setup_unknown_nmi_panic(char *str) __setup("unknown_nmi_panic", setup_unknown_nmi_panic); static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs *regs) +pci_serr_error(unsigned char reason, struct pt_regs *regs) { - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - printk(KERN_EMERG - "You have some hardware problem, likely on the PCI bus.\n"); + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ #if defined(CONFIG_EDAC) if (edac_handler_set()) { edac_atomic_assert_error(); @@ -329,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); - /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); + /* Clear and disable the PCI SERR error line. */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); } static notrace __kprobes void @@ -341,15 +346,17 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); show_registers(regs); if (panic_on_io_nmi) panic("NMI IOCK error: Not continuing"); /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); i = 20000; while (--i) { @@ -357,8 +364,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs) udelay(100); } - reason &= ~8; - outb(reason, 0x61); + reason &= ~NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); } static notrace __kprobes void @@ -377,57 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) return; } #endif - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); } static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; - int cpu; - cpu = smp_processor_id(); - - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) - reason = get_nmi_reason(); + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; - if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); -#ifdef CONFIG_X86_LOCAL_APIC - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); #endif - unknown_nmi_error(reason, regs); - + raw_spin_unlock(&nmi_reason_lock); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; + raw_spin_unlock(&nmi_reason_lock); - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) - mem_parity_error(reason, regs); - if (reason & 0x40) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); -#endif + unknown_nmi_error(reason, regs); } dotraplinkage notrace __kprobes void diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 463901efdba..823f79a17ad 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -659,7 +659,7 @@ void restore_sched_clock_state(void) local_irq_save(flags); - __get_cpu_var(cyc2ns_offset) = 0; + __this_cpu_write(cyc2ns_offset, 0); offset = cyc2ns_suspend - sched_clock(); for_each_possible_cpu(cpu) |