From bde92cf455a03a91badb7046855592d8c008e929 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 23 Jun 2014 13:22:03 -0700 Subject: kernel/watchdog.c: remove preemption restrictions when restarting lockup detector Peter Wu noticed the following splat on his machine when updating /proc/sys/kernel/watchdog_thresh: BUG: sleeping function called from invalid context at mm/slub.c:965 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init 3 locks held by init/1: #0: (sb_writers#3){.+.+.+}, at: [] vfs_write+0x143/0x180 #1: (watchdog_proc_mutex){+.+.+.}, at: [] proc_dowatchdog+0x33/0x110 #2: (cpu_hotplug.lock){.+.+.+}, at: [] get_online_cpus+0x32/0x80 Preemption disabled at:[] proc_dowatchdog+0xe4/0x110 CPU: 0 PID: 1 Comm: init Not tainted 3.16.0-rc1-testing #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0x4e/0x7a __might_sleep+0x11d/0x190 kmem_cache_alloc_trace+0x4e/0x1e0 perf_event_alloc+0x55/0x440 perf_event_create_kernel_counter+0x26/0xe0 watchdog_nmi_enable+0x75/0x140 update_timers_all_cpus+0x53/0xa0 proc_dowatchdog+0xe4/0x110 proc_sys_call_handler+0xb3/0xc0 proc_sys_write+0x14/0x20 vfs_write+0xad/0x180 SyS_write+0x49/0xb0 system_call_fastpath+0x16/0x1b NMI watchdog: disabled (cpu0): hardware events not enabled What happened is after updating the watchdog_thresh, the lockup detector is restarted to utilize the new value. Part of this process involved disabling preemption. Once preemption was disabled, perf tried to allocate a new event (as part of the restart). This caused the above BUG_ON as you can't sleep with preemption disabled. The preemption restriction seemed agressive as we are not doing anything on that particular cpu, but with all the online cpus (which are protected by the get_online_cpus lock). Remove the restriction and the BUG_ON goes away. Signed-off-by: Don Zickus Acked-by: Michal Hocko Reported-by: Peter Wu Tested-by: Peter Wu Acked-by: David Rientjes Cc: [3.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/watchdog.c') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 516203e665f..30e482240da 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -527,10 +527,8 @@ static void update_timers_all_cpus(void) int cpu; get_online_cpus(); - preempt_disable(); for_each_online_cpu(cpu) update_timers(cpu); - preempt_enable(); put_online_cpus(); } -- cgit v1.2.3-70-g09d2 From ed235875e2ca983197831337a986f0517074e1a0 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 23 Jun 2014 13:22:05 -0700 Subject: kernel/watchdog.c: print traces for all cpus on lockup detection A 'softlockup' is defined as a bug that causes the kernel to loop in kernel mode for more than a predefined period to time, without giving other tasks a chance to run. Currently, upon detection of this condition by the per-cpu watchdog task, debug information (including a stack trace) is sent to the system log. On some occasions, we have observed that the "victim" rather than the actual "culprit" (i.e. the owner/holder of the contended resource) is reported to the user. Often this information has proven to be insufficient to assist debugging efforts. To avoid loss of useful debug information, for architectures which support NMI, this patch makes it possible to improve soft lockup reporting. This is accomplished by issuing an NMI to each cpu to obtain a stack trace. If NMI is not supported we just revert back to the old method. A sysctl and boot-time parameter is available to toggle this feature. [dzickus@redhat.com: add CONFIG_SMP in certain areas] [akpm@linux-foundation.org: additional CONFIG_SMP=n optimisations] [mq@suse.cz: fix warning] Signed-off-by: Aaron Tomlin Signed-off-by: Don Zickus Cc: David S. Miller Cc: Mateusz Guzik Cc: Oleg Nesterov Signed-off-by: Jan Moskyto Matejka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ Documentation/sysctl/kernel.txt | 17 ++++++++++++++++ include/linux/nmi.h | 1 + kernel/sysctl.c | 11 +++++++++++ kernel/watchdog.c | 39 +++++++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+) (limited to 'kernel/watchdog.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 884904975d0..c1b9aa8c5a5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [KNL] Should the soft-lockup detector generate panics. Format: + softlockup_all_cpu_backtrace= + [KNL] Should the soft-lockup detector generate + backtraces on all cpus. + Format: + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 708bb7f1b7e..c14374e7177 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -75,6 +75,7 @@ show up in /proc/sys/kernel: - shmall - shmmax [ sysv ipc ] - shmmni +- softlockup_all_cpu_backtrace - stop-a [ SPARC only ] - sysrq ==> Documentation/sysrq.txt - sysctl_writes_strict @@ -783,6 +784,22 @@ via the /proc/sys interface: ============================================================== +softlockup_all_cpu_backtrace: + +This value controls the soft lockup detector thread's behavior +when a soft lockup condition is detected as to whether or not +to gather further debug information. If enabled, each cpu will +be issued an NMI and instructed to capture stack trace. + +This feature is only applicable for architectures which support +NMI. + +0: do nothing. This is the default behavior. + +1: on detection capture more debug information. + +============================================================== + tainted: Non-zero if the kernel has been tainted. Numeric values, which diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a17ab6398d7..447775ee2c4 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -57,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(int watchdog_thresh); extern int watchdog_user_enabled; extern int watchdog_thresh; +extern int sysctl_softlockup_all_cpu_backtrace; struct ctl_table; extern int proc_dowatchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 075d1903138..75b22e22a72 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -860,6 +860,17 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#ifdef CONFIG_SMP + { + .procname = "softlockup_all_cpu_backtrace", + .data = &sysctl_softlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif /* CONFIG_SMP */ { .procname = "nmi_watchdog", .data = &watchdog_user_enabled, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 30e482240da..c3319bd1b04 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,6 +31,12 @@ int watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; +#ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; +#else +#define sysctl_softlockup_all_cpu_backtrace 0 +#endif + static int __read_mostly watchdog_running; static u64 __read_mostly sample_period; @@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif +static unsigned long soft_lockup_nmi_warn; /* boot commands */ /* @@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str) } __setup("nosoftlockup", nosoftlockup_setup); /* */ +#ifdef CONFIG_SMP +static int __init softlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_softlockup_all_cpu_backtrace = + !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#endif /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- @@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); struct pt_regs *regs = get_irq_regs(); int duration; + int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; /* kick the hardlockup detector */ watchdog_interrupt_count(); @@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; + if (softlockup_all_cpu_backtrace) { + /* Prevent multiple soft-lockup reports if one cpu is already + * engaged in dumping cpu back traces + */ + if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { + /* Someone else will report us. Let's give up */ + __this_cpu_write(soft_watchdog_warn, true); + return HRTIMER_RESTART; + } + } + printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); + if (softlockup_all_cpu_backtrace) { + /* Avoid generating two back traces for current + * given that one is already made above + */ + trigger_allbutself_cpu_backtrace(); + + clear_bit(0, &soft_lockup_nmi_warn); + /* Barrier to sync with other cpus */ + smp_mb__after_atomic(); + } + if (softlockup_panic) panic("softlockup: hung tasks"); __this_cpu_write(soft_watchdog_warn, true); -- cgit v1.2.3-70-g09d2