From 7a05c0f7bbae91d08b7d0acf016fdb42dbc912ae Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 23 Mar 2012 15:01:55 -0700 Subject: watchdog: make sure the watchdog thread gets CPU on loaded system If the system is loaded while hotplugging a CPU we might end up with a bogus hardlockup detection. This has been seen during LTP pounder test executed in parallel with hotplug test. The main problem is that enable_watchdog (called when CPU is brought up) registers perf event which periodically checks per-cpu counter (hrtimer_interrupts), updated from a hrtimer callback, but the hrtimer is fired from the kernel thread. This means that while we already do check for the hard lockup the kernel thread might be sitting on the runqueue with zillions of tasks so there is nobody to update the value we rely on and so we KABOOM. Let's fix this by boosting the watchdog thread priority before we wake it up rather than when it's already running. This still doesn't handle a case where we have the same amount of high prio FIFO tasks but that doesn't seem to be common. The current implementation doesn't handle that case anyway so this is not worse at least. Unfortunately, we cannot start perf counter from the watchdog thread because we could miss a real lock up and also we cannot start the hrtimer watchdog_enable because we there is no way (at least I don't know any) to start a hrtimer from a different CPU. [dzickus@redhat.com: fix compile issue with param] Cc: Ingo Molnar Cc: Peter Zijlstra Reviewed-by: Mandeep Singh Baines Signed-off-by: Michal Hocko Signed-off-by: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/watchdog.c') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 14bc092fb12..203fc6e1a28 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -319,11 +319,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ static int watchdog(void *unused) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + struct sched_param param = { .sched_priority = 0 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); - sched_setscheduler(current, SCHED_FIFO, ¶m); - /* initialize timestamp */ __touch_watchdog(); @@ -350,7 +348,6 @@ static int watchdog(void *unused) set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); - param.sched_priority = 0; sched_setscheduler(current, SCHED_NORMAL, ¶m); return 0; } @@ -439,6 +436,7 @@ static int watchdog_enable(int cpu) /* create the watchdog thread */ if (!p) { + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); if (IS_ERR(p)) { printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); @@ -450,6 +448,7 @@ static int watchdog_enable(int cpu) } goto out; } + sched_setscheduler(p, SCHED_FIFO, ¶m); kthread_bind(p, cpu); per_cpu(watchdog_touch_ts, cpu) = 0; per_cpu(softlockup_watchdog, cpu) = p; -- cgit v1.2.3-70-g09d2 From 4501980aae221ed8120dee3491f799ecd75187ad Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 23 Mar 2012 15:01:55 -0700 Subject: kernel/watchdog.c: convert to pr_foo() It fixes some 80-col wordwrappings and adds some consistency. Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'kernel/watchdog.c') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 203fc6e1a28..a01cb03b045 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -9,6 +9,8 @@ * to those contributors as well. */ +#define pr_fmt(fmt) "NMI watchdog: " fmt + #include #include #include @@ -373,18 +375,20 @@ static int watchdog_nmi_enable(int cpu) /* Try to register using hardware perf events */ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); if (!IS_ERR(event)) { - printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); + pr_info("enabled, takes one hw-pmu counter.\n"); goto out_save; } /* vary the KERN level based on the returned errno */ if (PTR_ERR(event) == -EOPNOTSUPP) - printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); + pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); else if (PTR_ERR(event) == -ENOENT) - printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); + pr_warning("disabled (cpu%i): hardware events not enabled\n", + cpu); else - printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); + pr_err("disabled (cpu%i): unable to create perf event: %ld\n", + cpu, PTR_ERR(event)); return PTR_ERR(event); /* success path */ @@ -439,7 +443,7 @@ static int watchdog_enable(int cpu) struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); if (IS_ERR(p)) { - printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); + pr_err("softlockup watchdog for %i failed\n", cpu); if (!err) { /* if hardlockup hasn't already set this */ err = PTR_ERR(p); @@ -495,7 +499,7 @@ static void watchdog_enable_all_cpus(void) watchdog_enabled = 1; if (!watchdog_enabled) - printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); + pr_err("failed to be enabled on some cpus\n"); } -- cgit v1.2.3-70-g09d2 From b60f796c4ca72545327a069f12938360d833cce7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 23 Mar 2012 15:01:56 -0700 Subject: kernel/watchdog.c: add comment to watchdog() exit path Revelation from Peter. Cc: Peter Zijlstra Cc: Don Zickus Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/watchdog.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/watchdog.c') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a01cb03b045..df30ee08bdd 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -349,6 +349,10 @@ static int watchdog(void *unused) set_current_state(TASK_INTERRUPTIBLE); } + /* + * Drop the policy/priority elevation during thread exit to avoid a + * scheduling latency spike. + */ __set_current_state(TASK_RUNNING); sched_setscheduler(current, SCHED_NORMAL, ¶m); return 0; -- cgit v1.2.3-70-g09d2