From 1d12d35284b74b7257b84ba0cef1e82a66d73aea Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 1 Aug 2011 11:08:59 -0400 Subject: oprofile, x86: Convert memory allocation to static array On -rt, allocators don't work from atomic context any more, and the maximum size of the array is known at compile time. Call trace on a -rt kernel: oprofile: using NMI interrupt. BUG: sleeping function called from invalid context at kernel/rtmutex.c:645 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: kworker/0:1 Pid: 0, comm: kworker/0:1 Tainted: G C 3.0.0-rt3-patser+ #39 Call Trace: [] __might_sleep+0xca/0xf0 [] rt_spin_lock+0x24/0x40 [] __kmalloc+0xc7/0x370 [] ? ppro_setup_ctrs+0x215/0x260 [oprofile] [] ? oprofile_cpu_notifier+0x60/0x60 [oprofile] [] ppro_setup_ctrs+0x215/0x260 [oprofile] [] ? oprofile_cpu_notifier+0x60/0x60 [oprofile] [] ? oprofile_cpu_notifier+0x60/0x60 [oprofile] [] nmi_cpu_setup+0xc4/0x110 [oprofile] [] generic_smp_call_function_interrupt+0x95/0x190 [] smp_call_function_interrupt+0x27/0x40 [] call_function_interrupt+0x13/0x20 [] ? plist_check_head+0x54/0xc0 [] ? intel_idle+0xc8/0x120 [] ? intel_idle+0xa7/0x120 [] cpuidle_idle_call+0xb0/0x230 [] cpu_idle+0x8b/0xe0 [] start_secondary+0x1d3/0x1d8 Signed-off-by: Maarten Lankhorst Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'arch/x86/oprofile') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 94b745045e4..608874b70cf 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -28,7 +28,7 @@ static int counter_width = 32; #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) -static u64 *reset_value; +static u64 reset_value[OP_MAX_COUNTER]; static void ppro_shutdown(struct op_msrs const * const msrs) { @@ -40,10 +40,6 @@ static void ppro_shutdown(struct op_msrs const * const msrs) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } - if (reset_value) { - kfree(reset_value); - reset_value = NULL; - } } static int ppro_fill_in_addresses(struct op_msrs * const msrs) @@ -79,13 +75,6 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, u64 val; int i; - if (!reset_value) { - reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, - GFP_ATOMIC); - if (!reset_value) - return; - } - if (cpu_has_arch_perfmon) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); @@ -141,13 +130,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs, u64 val; int i; - /* - * This can happen if perf counters are in use when - * we steal the die notifier NMI. - */ - if (unlikely(!reset_value)) - goto out; - for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; @@ -179,8 +161,6 @@ static void ppro_start(struct op_msrs const * const msrs) u64 val; int i; - if (!reset_value) - return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { rdmsrl(msrs->controls[i].addr, val); @@ -196,8 +176,6 @@ static void ppro_stop(struct op_msrs const * const msrs) u64 val; int i; - if (!reset_value) - return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; -- cgit v1.2.3-70-g09d2 From 298557db42eb2d6efca81669dc369425b46c5be6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Aug 2011 23:39:53 +0200 Subject: oprofile, x86: Fix overflow and warning (commit 1d12d35) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following fixes for: 1d12d35 oprofile, x86: Convert memory allocation to static array Fix potential buffer overflow. Fix the following warning: arch/x86/oprofile/op_model_ppro.c: In function ‘ppro_check_ctrs’: arch/x86/oprofile/op_model_ppro.c:143: warning: label ‘out’ defined but not used Cc: Maarten Lankhorst Cc: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_ppro.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/oprofile') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 608874b70cf..d90528ea541 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -140,7 +140,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs, wrmsrl(msrs->counters[i].addr, -reset_value[i]); } -out: /* Only P6 based Pentium M need to re-unmask the apic vector but it * doesn't hurt other P6 variant */ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); @@ -220,7 +219,7 @@ static void arch_perfmon_setup_counters(void) eax.split.bit_width = 40; } - num_counters = eax.split.num_counters; + num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER); op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; -- cgit v1.2.3-70-g09d2 From 9c48f1c629ecfa114850c03f875c6691003214de Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 30 Sep 2011 15:06:21 -0400 Subject: x86, nmi: Wire up NMI handlers to new routines Just convert all the files that have an nmi handler to the new routines. Most of it is straight forward conversion. A couple of places needed some tweaking like kgdb which separates the debug notifier from the nmi handler and mce removes a call to notify_die. [Thanks to Ying for finding out the history behind that mce call https://lkml.org/lkml/2010/5/27/114 And Boris responding that he would like to remove that call because of it https://lkml.org/lkml/2011/9/21/163] The things that get converted are the registeration/unregistration routines and the nmi handler itself has its args changed along with code removal to check which list it is on (most are on one NMI list except for kgdb which has both an NMI routine and an NMI Unknown routine). Signed-off-by: Don Zickus Signed-off-by: Peter Zijlstra Acked-by: Corey Minyard Cc: Jason Wessel Cc: Andi Kleen Cc: Robert Richter Cc: Huang Ying Cc: Corey Minyard Cc: Jack Steiner Link: http://lkml.kernel.org/r/1317409584-23662-4-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 20 ---------- arch/x86/include/asm/reboot.h | 2 +- arch/x86/kernel/apic/hw_nmi.c | 27 +++---------- arch/x86/kernel/apic/x2apic_uv_x.c | 20 ++-------- arch/x86/kernel/cpu/mcheck/mce-inject.c | 20 ++++------ arch/x86/kernel/cpu/mcheck/mce.c | 3 -- arch/x86/kernel/cpu/perf_event.c | 69 ++------------------------------- arch/x86/kernel/crash.c | 5 +-- arch/x86/kernel/kgdb.c | 60 +++++++++++++++++++++------- arch/x86/kernel/nmi.c | 11 ++++-- arch/x86/kernel/reboot.c | 23 ++++------- arch/x86/oprofile/nmi_int.c | 38 ++++++------------ arch/x86/oprofile/nmi_timer_int.c | 28 +++---------- drivers/acpi/apei/ghes.c | 22 ++++------- drivers/char/ipmi/ipmi_watchdog.c | 33 ++++++---------- drivers/watchdog/hpwdt.c | 24 +++--------- 16 files changed, 124 insertions(+), 281 deletions(-) (limited to 'arch/x86/oprofile') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 480b69b7575..53610957fea 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -22,26 +22,6 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif -/* - * Define some priorities for the nmi notifier call chain. - * - * Create a local nmi bit that has a higher priority than - * external nmis, because the local ones are more frequent. - * - * Also setup some default high/normal/low settings for - * subsystems to registers with. Using 4 bits to separate - * the priorities. This can go a lot higher if needed be. - */ - -#define NMI_LOCAL_SHIFT 16 /* randomly picked */ -#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) -#define NMI_HIGH_PRIOR (1ULL << 8) -#define NMI_NORMAL_PRIOR (1ULL << 4) -#define NMI_LOW_PRIOR (1ULL << 0) -#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) -#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) -#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) - #define NMI_FLAG_FIRST 1 enum { diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 3250e3d605d..92f297069e8 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -typedef void (*nmi_shootdown_cb)(int, struct die_args*); +typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); #endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index d5e57db0f7b..31cb9ae992b 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void) } static int __kprobes -arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - struct pt_regs *regs; int cpu; - switch (cmd) { - case DIE_NMI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { @@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, show_regs(regs); arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + return NMI_DONE; } -static __read_mostly struct notifier_block backtrace_notifier = { - .notifier_call = arch_trigger_all_cpu_backtrace_handler, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - static int __init register_trigger_all_cpu_backtrace(void) { - register_die_notifier(&backtrace_notifier); + register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, + 0, "arch_bt"); return 0; } early_initcall(register_trigger_all_cpu_backtrace); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 34b18594e72..75be00ecfff 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -672,18 +672,11 @@ void __cpuinit uv_cpu_init(void) /* * When NMI is received, print a stack trace. */ -int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) +int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) { unsigned long real_uv_nmi; int bid; - if (reason != DIE_NMIUNKNOWN) - return NOTIFY_OK; - - if (in_crash_kexec) - /* do nothing if entering the crash kernel */ - return NOTIFY_OK; - /* * Each blade has an MMR that indicates when an NMI has been sent * to cpus on the blade. If an NMI is detected, atomically @@ -704,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) } if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) - return NOTIFY_DONE; + return NMI_DONE; __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; @@ -717,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) dump_stack(); spin_unlock(&uv_nmi_lock); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi, - .priority = NMI_LOCAL_LOW_PRIOR - 1, -}; - void uv_register_nmi_notifier(void) { - if (register_die_notifier(&uv_dump_stack_nmi_nb)) + if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) printk(KERN_WARNING "UV NMI handler failed to register\n"); } diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 0ed633c5048..6199232161c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) static cpumask_var_t mce_inject_cpumask; -static int mce_raise_notify(struct notifier_block *self, - unsigned long val, void *data) +static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) - return NOTIFY_DONE; + if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) + return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) - raise_exception(m, args->regs); + raise_exception(m, regs); else if (m->status) raise_poll(m); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block mce_raise_nb = { - .notifier_call = mce_raise_notify, - .priority = NMI_LOCAL_NORMAL_PRIOR, -}; - /* Inject mce on current CPU */ static int raise_local(void) { @@ -216,7 +209,8 @@ static int inject_init(void) return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; - register_die_notifier(&mce_raise_nb); + register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, + "mce_notify"); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5b5cceec94c..fce51ad1f36 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) percpu_inc(mce_exception_count); - if (notify_die(DIE_NMI, "machine check", regs, error_code, - 18, SIGKILL) == NOTIFY_STOP) - goto out; if (!banks) goto out; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8ab89112f93..640891014b2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1058,76 +1058,15 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -struct pmu_nmi_state { - unsigned int marked; - int handled; -}; - -static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); - static int __kprobes -perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - unsigned int this_nmi; - int handled; - if (!atomic_read(&active_events)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - break; - case DIE_NMIUNKNOWN: - this_nmi = percpu_read(irq_stat.__nmi_count); - if (this_nmi != __this_cpu_read(pmu_nmi.marked)) - /* let the kernel handle the unknown nmi */ - return NOTIFY_DONE; - /* - * This one is a PMU back-to-back nmi. Two events - * trigger 'simultaneously' raising two back-to-back - * NMIs. If the first NMI handles both, the latter - * will be empty and daze the CPU. So, we drop it to - * avoid false-positive 'unknown nmi' messages. - */ - return NOTIFY_STOP; - default: - return NOTIFY_DONE; - } - - handled = x86_pmu.handle_irq(args->regs); - if (!handled) - return NOTIFY_DONE; + return NMI_DONE; - this_nmi = percpu_read(irq_stat.__nmi_count); - if ((handled > 1) || - /* the next nmi could be a back-to-back nmi */ - ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && - (__this_cpu_read(pmu_nmi.handled) > 1))) { - /* - * We could have two subsequent back-to-back nmis: The - * first handles more than one counter, the 2nd - * handles only one counter and the 3rd handles no - * counter. - * - * This is the 2nd nmi because the previous was - * handling more than one counter. We will mark the - * next (3rd) and then drop it if unhandled. - */ - __this_cpu_write(pmu_nmi.marked, this_nmi + 1); - __this_cpu_write(pmu_nmi.handled, handled); - } - - return NOTIFY_STOP; + return x86_pmu.handle_irq(regs); } -static __read_mostly struct notifier_block perf_event_nmi_notifier = { - .notifier_call = perf_event_nmi_handler, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - struct event_constraint emptyconstraint; struct event_constraint unconstrained; @@ -1232,7 +1171,7 @@ static int __init init_hw_perf_events(void) ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; perf_events_lapic_init(); - register_die_notifier(&perf_event_nmi_notifier); + register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 764c7c2b181..13ad89971d4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -32,15 +32,12 @@ int in_crash_kexec; #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) -static void kdump_nmi_callback(int cpu, struct die_args *args) +static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { - struct pt_regs *regs; #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; #endif - regs = args->regs; - #ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00354d4919a..faba5771aca 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) static int was_in_debug_nmi[NR_CPUS]; -static int __kgdb_notify(struct die_args *args, unsigned long cmd) +static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - struct pt_regs *regs = args->regs; - switch (cmd) { - case DIE_NMI: + case NMI_LOCAL: if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ kgdb_nmicallback(raw_smp_processor_id(), regs); was_in_debug_nmi[raw_smp_processor_id()] = 1; touch_nmi_watchdog(); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + break; - case DIE_NMIUNKNOWN: + case NMI_UNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + break; + default: + /* do nothing */ + break; + } + return NMI_DONE; +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + switch (cmd) { case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) @@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) static struct notifier_block kgdb_notifier = { .notifier_call = kgdb_notify, - - /* - * Lowest-prio notifier priority, we want to be notified last: - */ - .priority = NMI_LOCAL_LOW_PRIOR, }; /** @@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - return register_die_notifier(&kgdb_notifier); + int retval; + + retval = register_die_notifier(&kgdb_notifier); + if (retval) + goto out; + + retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler, + 0, "kgdb"); + if (retval) + goto out1; + + retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler, + 0, "kgdb"); + + if (retval) + goto out2; + + return retval; + +out2: + unregister_nmi_handler(NMI_LOCAL, "kgdb"); +out1: + unregister_die_notifier(&kgdb_notifier); +out: + return retval; } static void kgdb_hw_overflow_handler(struct perf_event *event, @@ -673,6 +701,8 @@ void kgdb_arch_exit(void) breakinfo[i].pev = NULL; } } + unregister_nmi_handler(NMI_UNKNOWN, "kgdb"); + unregister_nmi_handler(NMI_LOCAL, "kgdb"); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 327748d4f6b..e20f5e79059 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -1,6 +1,7 @@ /* * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + * Copyright (C) 2011 Don Zickus Red Hat, Inc. * * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 @@ -248,8 +249,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == - NOTIFY_STOP) + int handled; + + handled = nmi_handle(NMI_UNKNOWN, regs); + if (handled) return; #ifdef CONFIG_MCA /* @@ -274,13 +277,15 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + int handled; /* * CPU-specific NMI must be processed before non-CPU-specific * NMI, otherwise we may lose it, because the CPU-specific * NMI can not be detected/processed on other CPUs. */ - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + handled = nmi_handle(NMI_LOCAL, regs); + if (handled) return; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9242436e993..e334be1182b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -464,7 +464,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct die_args *args) +static void vmxoff_nmi(int cpu, struct pt_regs *regs) { cpu_emergency_vmxoff(); } @@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) +static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) { int cpu; - if (val != DIE_NMI) - return NOTIFY_OK; - cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. @@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self, * an NMI if system was initially booted with nmi_watchdog parameter. */ if (cpu == crashing_cpu) - return NOTIFY_STOP; + return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, (struct die_args *)data); + shootdown_callback(cpu, regs); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self, for (;;) cpu_relax(); - return 1; + return NMI_HANDLED; } static void smp_send_nmi_allbutself(void) @@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void) apic->send_IPI_allbutself(NMI_VECTOR); } -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, - /* we want to be the first one called */ - .priority = NMI_LOCAL_HIGH_PRIOR+1, -}; - /* Halt all other CPUs, calling the specified function on each of them * * This function can be used to halt all other CPUs on crash @@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - if (register_die_notifier(&crash_nmi_nb)) + if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, + NMI_FLAG_FIRST, "crash")) return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fdc034..adf8fb31aa7 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -61,26 +61,15 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, } -static int profile_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - if (ctr_running) - model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); - else if (!nmi_enabled) - break; - else - model->stop(&__get_cpu_var(cpu_msrs)); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + if (ctr_running) + model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); + else if (!nmi_enabled) + return NMI_DONE; + else + model->stop(&__get_cpu_var(cpu_msrs)); + return NMI_HANDLED; } static void nmi_cpu_save_registers(struct op_msrs *msrs) @@ -363,12 +352,6 @@ static void nmi_cpu_setup(void *dummy) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static struct notifier_block profile_exceptions_nb = { - .notifier_call = profile_exceptions_notify, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; @@ -508,7 +491,8 @@ static int nmi_setup(void) ctr_running = 0; /* make variables visible to the nmi handler: */ smp_mb(); - err = register_die_notifier(&profile_exceptions_nb); + err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify, + 0, "oprofile"); if (err) goto fail; @@ -538,7 +522,7 @@ static void nmi_shutdown(void) put_online_cpus(); /* make variables visible to the nmi handler: */ smp_mb(); - unregister_die_notifier(&profile_exceptions_nb); + unregister_nmi_handler(NMI_LOCAL, "oprofile"); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); free_msrs(); diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index 720bf5a53c5..7f8052cd662 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -18,32 +18,16 @@ #include #include -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + oprofile_add_sample(regs, 0); + return NMI_HANDLED; } -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = NMI_LOW_PRIOR, -}; - static int timer_start(void) { - if (register_die_notifier(&profile_timer_exceptions_nb)) + if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, + 0, "oprofile-timer")) return 1; return 0; } @@ -51,7 +35,7 @@ static int timer_start(void) static void timer_stop(void) { - unregister_die_notifier(&profile_timer_exceptions_nb); + unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); synchronize_sched(); /* Allow already-started NMIs to complete. */ } diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 0784f99a466..b8e08cb67a1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "apei-internal.h" @@ -749,15 +750,11 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) } } -static int ghes_notify_nmi(struct notifier_block *this, - unsigned long cmd, void *data) +static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes, *ghes_global = NULL; int sev, sev_global = -1; - int ret = NOTIFY_DONE; - - if (cmd != DIE_NMI) - return ret; + int ret = NMI_DONE; raw_spin_lock(&ghes_nmi_lock); list_for_each_entry_rcu(ghes, &ghes_nmi, list) { @@ -770,10 +767,10 @@ static int ghes_notify_nmi(struct notifier_block *this, sev_global = sev; ghes_global = ghes; } - ret = NOTIFY_STOP; + ret = NMI_HANDLED; } - if (ret == NOTIFY_DONE) + if (ret == NMI_DONE) goto out; if (sev_global >= GHES_SEV_PANIC) { @@ -825,10 +822,6 @@ static struct notifier_block ghes_notifier_sci = { .notifier_call = ghes_notify_sci, }; -static struct notifier_block ghes_notifier_nmi = { - .notifier_call = ghes_notify_nmi, -}; - static unsigned long ghes_esource_prealloc_size( const struct acpi_hest_generic *generic) { @@ -918,7 +911,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) - register_die_notifier(&ghes_notifier_nmi); + register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, + "ghes"); list_add_rcu(&ghes->list, &ghes_nmi); mutex_unlock(&ghes_list_mutex); break; @@ -964,7 +958,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) mutex_lock(&ghes_list_mutex); list_del_rcu(&ghes->list); if (list_empty(&ghes_nmi)) - unregister_die_notifier(&ghes_notifier_nmi); + unregister_nmi_handler(NMI_LOCAL, "ghes"); mutex_unlock(&ghes_list_mutex); /* * To synchronize with NMI handler, ghes can only be diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 3302586655c..c2917ffad2c 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -65,6 +65,7 @@ * mechanism for it at that time. */ #include +#include #define HAVE_DIE_NMI #endif @@ -1077,17 +1078,8 @@ static void ipmi_unregister_watchdog(int ipmi_intf) #ifdef HAVE_DIE_NMI static int -ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) +ipmi_nmi(unsigned int val, struct pt_regs *regs) { - struct die_args *args = data; - - if (val != DIE_NMIUNKNOWN) - return NOTIFY_OK; - - /* Hack, if it's a memory or I/O error, ignore it. */ - if (args->err & 0xc0) - return NOTIFY_OK; - /* * If we get here, it's an NMI that's not a memory or I/O * error. We can't truly tell if it's from IPMI or not @@ -1097,15 +1089,15 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) if (testing_nmi) { testing_nmi = 2; - return NOTIFY_STOP; + return NMI_HANDLED; } /* If we are not expecting a timeout, ignore it. */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) - return NOTIFY_OK; + return NMI_DONE; if (preaction_val != WDOG_PRETIMEOUT_NMI) - return NOTIFY_OK; + return NMI_DONE; /* * If no one else handled the NMI, we assume it was the IPMI @@ -1120,12 +1112,8 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) panic(PFX "pre-timeout"); } - return NOTIFY_STOP; + return NMI_HANDLED; } - -static struct notifier_block ipmi_nmi_handler = { - .notifier_call = ipmi_nmi -}; #endif static int wdog_reboot_handler(struct notifier_block *this, @@ -1290,7 +1278,8 @@ static void check_parms(void) } } if (do_nmi && !nmi_handler_registered) { - rv = register_die_notifier(&ipmi_nmi_handler); + rv = register_nmi_handler(NMI_UNKNOWN, ipmi_nmi, 0, + "ipmi"); if (rv) { printk(KERN_WARNING PFX "Can't register nmi handler\n"); @@ -1298,7 +1287,7 @@ static void check_parms(void) } else nmi_handler_registered = 1; } else if (!do_nmi && nmi_handler_registered) { - unregister_die_notifier(&ipmi_nmi_handler); + unregister_nmi_handler(NMI_UNKNOWN, "ipmi"); nmi_handler_registered = 0; } #endif @@ -1336,7 +1325,7 @@ static int __init ipmi_wdog_init(void) if (rv) { #ifdef HAVE_DIE_NMI if (nmi_handler_registered) - unregister_die_notifier(&ipmi_nmi_handler); + unregister_nmi_handler(NMI_UNKNOWN, "ipmi"); #endif atomic_notifier_chain_unregister(&panic_notifier_list, &wdog_panic_notifier); @@ -1357,7 +1346,7 @@ static void __exit ipmi_wdog_exit(void) #ifdef HAVE_DIE_NMI if (nmi_handler_registered) - unregister_die_notifier(&ipmi_nmi_handler); + unregister_nmi_handler(NMI_UNKNOWN, "ipmi"); #endif atomic_notifier_chain_unregister(&panic_notifier_list, diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 809cbda03d7..7e7feac0522 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -477,15 +477,11 @@ static int hpwdt_time_left(void) /* * NMI Handler */ -static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, - void *data) +static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs) { unsigned long rom_pl; static int die_nmi_called; - if (ulReason != DIE_NMIUNKNOWN) - goto out; - if (!hpwdt_nmi_decoding) goto out; @@ -508,7 +504,7 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, "Management Log for details.\n"); out: - return NOTIFY_OK; + return NMI_DONE; } #endif /* CONFIG_HPWDT_NMI_DECODING */ @@ -648,13 +644,6 @@ static struct miscdevice hpwdt_miscdev = { .fops = &hpwdt_fops, }; -#ifdef CONFIG_HPWDT_NMI_DECODING -static struct notifier_block die_notifier = { - .notifier_call = hpwdt_pretimeout, - .priority = 0, -}; -#endif /* CONFIG_HPWDT_NMI_DECODING */ - /* * Init & Exit */ @@ -740,10 +729,9 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) * die notify list to handle a critical NMI. The default is to * be last so other users of the NMI signal can function. */ - if (priority) - die_notifier.priority = 0x7FFFFFFF; - - retval = register_die_notifier(&die_notifier); + retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, + (priority) ? NMI_FLAG_FIRST : 0, + "hpwdt"); if (retval != 0) { dev_warn(&dev->dev, "Unable to register a die notifier (err=%d).\n", @@ -763,7 +751,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) static void hpwdt_exit_nmi_decoding(void) { - unregister_die_notifier(&die_notifier); + unregister_nmi_handler(NMI_UNKNOWN, "hpwdt"); if (cru_rom_addr) iounmap(cru_rom_addr); } -- cgit v1.2.3-70-g09d2 From ee5789dbcc800ba7d641443e53f60d53977f9747 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Sep 2011 11:30:17 +0200 Subject: perf, x86: Share IBS macros between perf and oprofile Moving IBS macros from oprofile to to make it available to perf. No additional changes. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1316597423-25723-2-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 38 +++++++++++++++++++++++++++++++++--- arch/x86/kernel/cpu/perf_event_amd.c | 4 ++-- arch/x86/oprofile/op_model_amd.c | 37 +++-------------------------------- 3 files changed, 40 insertions(+), 39 deletions(-) (limited to 'arch/x86/oprofile') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index e47cb6167e8..e9e277c6991 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,14 +46,17 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) +#define AMD64_NUM_COUNTERS 4 +#define AMD64_NUM_COUNTERS_F15H 6 +#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 /* * Intel "Architectural Performance Monitoring" CPUID @@ -113,6 +116,35 @@ union cpuid10_edx { */ #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) +/* + * IBS cpuid feature detection + */ + +#define IBS_CPUID_FEATURES 0x8000001b + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1U<<0) +#define IBS_CAPS_FETCHSAM (1U<<1) +#define IBS_CAPS_OPSAM (1U<<2) +#define IBS_CAPS_RDWROPCNT (1U<<3) +#define IBS_CAPS_OPCNT (1U<<4) +#define IBS_CAPS_BRNTRGT (1U<<5) +#define IBS_CAPS_OPCNTEXT (1U<<6) + +#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ + | IBS_CAPS_FETCHSAM \ + | IBS_CAPS_OPSAM) + +/* + * IBS APIC setup + */ +#define IBSCTL 0x1cc +#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) +#define IBSCTL_LVT_OFFSET_MASK 0x0F + /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db8e603ff0c..aeefd45697a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -411,7 +411,7 @@ static __initconst const struct x86_pmu amd_pmu = { .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, + .num_counters = AMD64_NUM_COUNTERS, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, @@ -575,7 +575,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .perfctr = MSR_F15H_PERF_CTR, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 6, + .num_counters = AMD64_NUM_COUNTERS_F15H, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 9cbb710dc94..e947e5cb2e6 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -29,8 +29,6 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_COUNTERS_F15H 6 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX #define NUM_VIRT_COUNTERS 32 #else @@ -69,35 +67,6 @@ struct ibs_state { static struct ibs_config ibs_config; static struct ibs_state ibs_state; -/* - * IBS cpuid feature detection - */ - -#define IBS_CPUID_FEATURES 0x8000001b - -/* - * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but - * bit 0 is used to indicate the existence of IBS. - */ -#define IBS_CAPS_AVAIL (1U<<0) -#define IBS_CAPS_FETCHSAM (1U<<1) -#define IBS_CAPS_OPSAM (1U<<2) -#define IBS_CAPS_RDWROPCNT (1U<<3) -#define IBS_CAPS_OPCNT (1U<<4) -#define IBS_CAPS_BRNTRGT (1U<<5) -#define IBS_CAPS_OPCNTEXT (1U<<6) - -#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ - | IBS_CAPS_FETCHSAM \ - | IBS_CAPS_OPSAM) - -/* - * IBS APIC setup - */ -#define IBSCTL 0x1cc -#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) -#define IBSCTL_LVT_OFFSET_MASK 0x0F - /* * IBS randomization macros */ @@ -439,7 +408,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) goto fail; } /* both registers must be reserved */ - if (num_counters == NUM_COUNTERS_F15H) { + if (num_counters == AMD64_NUM_COUNTERS_F15H) { msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); } else { @@ -741,9 +710,9 @@ static int op_amd_init(struct oprofile_operations *ops) ops->create_files = setup_ibs_files; if (boot_cpu_data.x86 == 0x15) { - num_counters = NUM_COUNTERS_F15H; + num_counters = AMD64_NUM_COUNTERS_F15H; } else { - num_counters = NUM_COUNTERS; + num_counters = AMD64_NUM_COUNTERS; } op_amd_spec.num_counters = num_counters; -- cgit v1.2.3-70-g09d2 From b716916679e72054d436afadce2f94dcad71cfad Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Sep 2011 11:30:18 +0200 Subject: perf, x86: Implement IBS initialization This patch implements IBS feature detection and initialzation. The code is shared between perf and oprofile. If IBS is available on the system for perf, a pmu is setup. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1316597423-25723-3-git-send-email-robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 + arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/perf_event_amd_ibs.c | 294 +++++++++++++++++++++++++++++++ arch/x86/oprofile/nmi_int.c | 2 - arch/x86/oprofile/op_model_amd.c | 197 --------------------- arch/x86/oprofile/op_x86_model.h | 1 - 6 files changed, 297 insertions(+), 201 deletions(-) create mode 100644 arch/x86/kernel/cpu/perf_event_amd_ibs.c (limited to 'arch/x86/oprofile') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index e9e277c6991..f61c62f7d5d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -159,6 +159,8 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +extern u32 get_ibs_caps(void); + #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1044fd787db..fe6eb197f84 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -36,7 +36,7 @@ endif obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c new file mode 100644 index 00000000000..ab6343d2182 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -0,0 +1,294 @@ +/* + * Performance events - AMD IBS + * + * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include + +#include + +static u32 ibs_caps; + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) + +static struct pmu perf_ibs; + +static int perf_ibs_init(struct perf_event *event) +{ + if (perf_ibs.type != event->attr.type) + return -ENOENT; + return 0; +} + +static int perf_ibs_add(struct perf_event *event, int flags) +{ + return 0; +} + +static void perf_ibs_del(struct perf_event *event, int flags) +{ +} + +static struct pmu perf_ibs = { + .event_init= perf_ibs_init, + .add= perf_ibs_add, + .del= perf_ibs_del, +}; + +static __init int perf_event_ibs_init(void) +{ + if (!ibs_caps) + return -ENODEV; /* ibs not supported by the cpu */ + + perf_pmu_register(&perf_ibs, "ibs", -1); + printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); + + return 0; +} + +#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ + +static __init int perf_event_ibs_init(void) { return 0; } + +#endif + +/* IBS - apic initialization, for perf and oprofile */ + +static __init u32 __get_ibs_caps(void) +{ + u32 caps; + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_DEFAULT; + + caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_DEFAULT; + + return caps; +} + +u32 get_ibs_caps(void) +{ + return ibs_caps; +} + +EXPORT_SYMBOL(get_ibs_caps); + +static inline int get_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); +} + +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + +/* + * Check and reserve APIC extended interrupt LVT offset for IBS if available. + */ +static inline int ibs_eilvt_valid(void) +{ + int offset; + u64 val; + int valid = 0; + + preempt_disable(); + + rdmsrl(MSR_AMD64_IBSCTL, val); + offset = val & IBSCTL_LVT_OFFSET_MASK; + + if (!(val & IBSCTL_LVT_OFFSET_VALID)) { + pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + if (!get_eilvt(offset)) { + pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + valid = 1; +out: + preempt_enable(); + + return valid; +} + +static int setup_ibs_ctl(int ibs_eilvt_off) +{ + struct pci_dev *cpu_cfg; + int nodes; + u32 value = 0; + + nodes = 0; + cpu_cfg = NULL; + do { + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, + cpu_cfg); + if (!cpu_cfg) + break; + ++nodes; + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off + | IBSCTL_LVT_OFFSET_VALID); + pci_read_config_dword(cpu_cfg, IBSCTL, &value); + if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { + pci_dev_put(cpu_cfg); + printk(KERN_DEBUG "Failed to setup IBS LVT offset, " + "IBSCTL = 0x%08x\n", value); + return -EINVAL; + } + } while (1); + + if (!nodes) { + printk(KERN_DEBUG "No CPU node configured for IBS\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This runs only on the current cpu. We try to find an LVT offset and + * setup the local APIC. For this we must disable preemption. On + * success we initialize all nodes with this offset. This updates then + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of + * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that + * is using the new offset. + */ +static int force_ibs_eilvt_setup(void) +{ + int offset; + int ret; + + preempt_disable(); + /* find the next free available EILVT entry, skip offset 0 */ + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; + } + preempt_enable(); + + if (offset == APIC_EILVT_NR_MAX) { + printk(KERN_DEBUG "No EILVT entry available\n"); + return -EBUSY; + } + + ret = setup_ibs_ctl(offset); + if (ret) + goto out; + + if (!ibs_eilvt_valid()) { + ret = -EFAULT; + goto out; + } + + pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); + pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); + + return 0; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return ret; +} + +static inline int get_ibs_lvt_offset(void) +{ + u64 val; + + rdmsrl(MSR_AMD64_IBSCTL, val); + if (!(val & IBSCTL_LVT_OFFSET_VALID)) + return -EINVAL; + + return val & IBSCTL_LVT_OFFSET_MASK; +} + +static void setup_APIC_ibs(void *dummy) +{ + int offset; + + offset = get_ibs_lvt_offset(); + if (offset < 0) + goto failed; + + if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) + return; +failed: + pr_warn("perf: IBS APIC setup failed on cpu #%d\n", + smp_processor_id()); +} + +static void clear_APIC_ibs(void *dummy) +{ + int offset; + + offset = get_ibs_lvt_offset(); + if (offset >= 0) + setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); +} + +static int __cpuinit +perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + setup_APIC_ibs(NULL); + break; + case CPU_DYING: + clear_APIC_ibs(NULL); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static __init int amd_ibs_init(void) +{ + u32 caps; + int ret; + + caps = __get_ibs_caps(); + if (!caps) + return -ENODEV; /* ibs not supported by the cpu */ + + if (!ibs_eilvt_valid()) { + ret = force_ibs_eilvt_setup(); + if (ret) { + pr_err("Failed to setup IBS, %d\n", ret); + return ret; + } + } + + get_online_cpus(); + ibs_caps = caps; + /* make ibs_caps visible to other cpus: */ + smp_mb(); + perf_cpu_notifier(perf_ibs_cpu_notifier); + smp_call_function(setup_APIC_ibs, NULL, 1); + put_online_cpus(); + + return perf_event_ibs_init(); +} + +/* Since we need the pci subsystem to init ibs we can't do this earlier: */ +device_initcall(amd_ibs_init); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index adf8fb31aa7..c04dc145a4b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -385,8 +385,6 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); - if (model->cpu_down) - model->cpu_down(); } static void nmi_cpu_up(void *dummy) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index e947e5cb2e6..303f0863782 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -74,27 +74,6 @@ static struct ibs_state ibs_state; #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) -static u32 get_ibs_caps(void) -{ - u32 ibs_caps; - unsigned int max_level; - - if (!boot_cpu_has(X86_FEATURE_IBS)) - return 0; - - /* check IBS cpuid feature flags */ - max_level = cpuid_eax(0x80000000); - if (max_level < IBS_CPUID_FEATURES) - return IBS_CAPS_DEFAULT; - - ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); - if (!(ibs_caps & IBS_CAPS_AVAIL)) - /* cpuid flags not valid */ - return IBS_CAPS_DEFAULT; - - return ibs_caps; -} - /* * 16-bit Linear Feedback Shift Register (LFSR) * @@ -285,81 +264,6 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -static inline int get_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); -} - -static inline int put_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, 0, 1); -} - -static inline int ibs_eilvt_valid(void) -{ - int offset; - u64 val; - int valid = 0; - - preempt_disable(); - - rdmsrl(MSR_AMD64_IBSCTL, val); - offset = val & IBSCTL_LVT_OFFSET_MASK; - - if (!(val & IBSCTL_LVT_OFFSET_VALID)) { - pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - if (!get_eilvt(offset)) { - pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - valid = 1; -out: - preempt_enable(); - - return valid; -} - -static inline int get_ibs_offset(void) -{ - u64 val; - - rdmsrl(MSR_AMD64_IBSCTL, val); - if (!(val & IBSCTL_LVT_OFFSET_VALID)) - return -EINVAL; - - return val & IBSCTL_LVT_OFFSET_MASK; -} - -static void setup_APIC_ibs(void) -{ - int offset; - - offset = get_ibs_offset(); - if (offset < 0) - goto failed; - - if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) - return; -failed: - pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n", - smp_processor_id()); -} - -static void clear_APIC_ibs(void) -{ - int offset; - - offset = get_ibs_offset(); - if (offset >= 0) - setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); -} - #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, @@ -473,15 +377,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, val |= op_x86_get_ctrl(model, &counter_config[virt]); wrmsrl(msrs->controls[i].addr, val); } - - if (ibs_caps) - setup_APIC_ibs(); -} - -static void op_amd_cpu_shutdown(void) -{ - if (ibs_caps) - clear_APIC_ibs(); } static int op_amd_check_ctrs(struct pt_regs * const regs, @@ -544,86 +439,6 @@ static void op_amd_stop(struct op_msrs const * const msrs) op_amd_stop_ibs(); } -static int setup_ibs_ctl(int ibs_eilvt_off) -{ - struct pci_dev *cpu_cfg; - int nodes; - u32 value = 0; - - nodes = 0; - cpu_cfg = NULL; - do { - cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, - cpu_cfg); - if (!cpu_cfg) - break; - ++nodes; - pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off - | IBSCTL_LVT_OFFSET_VALID); - pci_read_config_dword(cpu_cfg, IBSCTL, &value); - if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { - pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); - return -EINVAL; - } - } while (1); - - if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); - return -ENODEV; - } - - return 0; -} - -/* - * This runs only on the current cpu. We try to find an LVT offset and - * setup the local APIC. For this we must disable preemption. On - * success we initialize all nodes with this offset. This updates then - * the offset in the IBS_CTL per-node msr. The per-core APIC setup of - * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_- - * amd_cpu_shutdown() using the new offset. - */ -static int force_ibs_eilvt_setup(void) -{ - int offset; - int ret; - - preempt_disable(); - /* find the next free available EILVT entry, skip offset 0 */ - for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { - if (get_eilvt(offset)) - break; - } - preempt_enable(); - - if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); - return -EBUSY; - } - - ret = setup_ibs_ctl(offset); - if (ret) - goto out; - - if (!ibs_eilvt_valid()) { - ret = -EFAULT; - goto out; - } - - pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); - pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); - - return 0; -out: - preempt_disable(); - put_eilvt(offset); - preempt_enable(); - return ret; -} - /* * check and reserve APIC extended interrupt LVT offset for IBS if * available @@ -636,17 +451,6 @@ static void init_ibs(void) if (!ibs_caps) return; - if (ibs_eilvt_valid()) - goto out; - - if (!force_ibs_eilvt_setup()) - goto out; - - /* Failed to setup ibs */ - ibs_caps = 0; - return; - -out: printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } @@ -729,7 +533,6 @@ struct op_x86_model_spec op_amd_spec = { .init = op_amd_init, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, - .cpu_down = &op_amd_cpu_shutdown, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 89017fa1fd6..71e8a67337e 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -43,7 +43,6 @@ struct op_x86_model_spec { int (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_x86_model_spec const *model, struct op_msrs const * const msrs); - void (*cpu_down)(void); int (*check_ctrs)(struct pt_regs * const regs, struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); -- cgit v1.2.3-70-g09d2