From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index dad7f668ebf..37efe8fa530 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,6 +346,12 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_LOCKUP_DETECTOR +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3-70-g09d2 From 332fbdbca3f7716c5620970755ae054d213bcc4e Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:45 -0400 Subject: lockup_detector: Touch_softlockup cleanups and softlockup_tick removal Just some code cleanup to make touch_softlockup clearer and remove the softlockup_tick function as it is no longer needed. Also remove the /proc softlockup_thres call as it has been changed to watchdog_thres. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-3-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/sched.h | 16 +++------------- kernel/sysctl.c | 9 --------- kernel/timer.c | 1 - kernel/watchdog.c | 35 +++-------------------------------- 4 files changed, 6 insertions(+), 55 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 37efe8fa530..33f9b2ad0bb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -312,19 +312,15 @@ extern void scheduler_tick(void); extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP -extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; #else -static inline void softlockup_tick(void) -{ -} static inline void touch_softlockup_watchdog(void) { } @@ -346,12 +342,6 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); #endif -#ifdef CONFIG_LOCKUP_DETECTOR -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0f9adda85f9..999bc3fccf4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -817,15 +817,6 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, - { - .procname = "softlockup_thresh", - .data = &softlockup_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dosoftlockup_thresh, - .extra1 = &neg_one, - .extra2 = &sixty, - }, #endif #ifdef CONFIG_DETECT_HUNG_TASK { diff --git a/kernel/timer.c b/kernel/timer.c index aeb6a54f277..e8de5eb07a0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1225,7 +1225,6 @@ void run_local_timers(void) { hrtimer_run_queues(); raise_softirq(TIMER_SOFTIRQ); - softlockup_tick(); } /* diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6b7fad8497a..f1541b7e324 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -119,13 +119,12 @@ static void __touch_watchdog(void) __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); } -void touch_watchdog(void) +void touch_softlockup_watchdog(void) { __get_cpu_var(watchdog_touch_ts) = 0; } -EXPORT_SYMBOL(touch_watchdog); -void touch_all_watchdog(void) +void touch_all_softlockup_watchdogs(void) { int cpu; @@ -140,35 +139,16 @@ void touch_all_watchdog(void) void touch_nmi_watchdog(void) { - touch_watchdog(); + touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); -void touch_all_nmi_watchdog(void) -{ - touch_all_watchdog(); -} - -void touch_softlockup_watchdog(void) -{ - touch_watchdog(); -} - -void touch_all_softlockup_watchdogs(void) -{ - touch_all_watchdog(); -} - void touch_softlockup_watchdog_sync(void) { __raw_get_cpu_var(softlockup_touch_sync) = true; __raw_get_cpu_var(watchdog_touch_ts) = 0; } -void softlockup_tick(void) -{ -} - #ifdef CONFIG_PERF_EVENTS_NMI /* watchdog detector functions */ static int is_hardlockup(int cpu) @@ -522,15 +502,6 @@ int proc_dowatchdog_thresh(struct ctl_table *table, int write, { return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } - -/* stub functions */ -int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos); -} -/* end of stub functions */ #endif /* CONFIG_SYSCTL */ -- cgit v1.2.3-70-g09d2 From 19cc36c0f0457e5c6629ec24036fbbe8255c88ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 May 2010 02:30:49 +0200 Subject: lockup_detector: Fix forgotten config conversion Fix forgotten CONFIG_DETECT_SOFTLOCKUP -> CONFIG_LOCKUP_DETECTOR in sched.h Fixes: arch/x86/built-in.o: In function `touch_nmi_watchdog': (.text+0x1bd59): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `show_state_filter': (.text+0x10d01): undefined reference to `touch_all_softlockup_watchdogs' kernel/built-in.o: In function `sched_clock_idle_wakeup_event': (.text+0x362f9): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `timekeeping_resume': timekeeping.c:(.text+0x38757): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_nohz_handler': tick-sched.c:(.text+0x3e5b9): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_sched_timer': tick-sched.c:(.text+0x3e671): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_check_idle': (.text+0x3e90b): undefined reference to `touch_softlockup_watchdog' Signed-off-by: Frederic Weisbecker Cc: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33f9b2ad0bb..3958e0cd24f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -311,7 +311,7 @@ extern void scheduler_tick(void); extern void sched_show_task(struct task_struct *p); -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_LOCKUP_DETECTOR extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -- cgit v1.2.3-70-g09d2