From 7e49fcce1bdadd723ae6a0b3b324c4daced61563 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Jan 2009 19:01:40 -0500 Subject: trace, lockdep: manual preempt count adding for local_bh_disable Impact: fix to preempt trace triggering lockdep check_flag failure In local_bh_disable, the use of add_preempt_count causes the preempt tracer to start recording the time preemption is off. But because it already modified the preempt_count to show softirqs disabled, and before it called the lockdep code to handle this, it causes a state that lockdep can not handle. The preempt tracer will reset the ring buffer on start of a trace, and the ring buffer reset code does a spin_lock_irqsave. This calls into lockdep and lockdep will fail when it detects the invalid state of having softirqs disabled but the internal current->softirqs_enabled is still set. The fix is to manually add the SOFTIRQ_OFFSET to preempt count and call the preempt tracer code outside the lockdep critical area. Thanks to Peter Zijlstra for suggesting this solution. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/softirq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/softirq.c') diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8..6edfc2c11d9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip) WARN_ON_ONCE(in_irq()); raw_local_irq_save(flags); - add_preempt_count(SOFTIRQ_OFFSET); + /* + * The preempt tracer hooks into add_preempt_count and will break + * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET + * is set and before current->softirq_enabled is cleared. + * We must manually increment preempt_count here and manually + * call the trace_preempt_off later. + */ + preempt_count() += SOFTIRQ_OFFSET; /* * Were softirqs turned off above: */ if (softirq_count() == SOFTIRQ_OFFSET) trace_softirqs_off(ip); raw_local_irq_restore(flags); + + if (preempt_count() == SOFTIRQ_OFFSET) + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } #else /* !CONFIG_TRACE_IRQFLAGS */ static inline void __local_bh_disable(unsigned long ip) -- cgit v1.2.3-70-g09d2 From 5d592b44b29a1d73e13d5c9e3426eed843bdc359 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 12 Mar 2009 14:33:36 -0400 Subject: tracing: tracepoints for softirq entry/exit - add softirq-to-name array Create a 'softirq_to_name' array, which is indexed by softirq #, so that we can easily convert between the softirq index # and its name, in order to get more meaningful output messages. LKML-Reference: <20090312183336.GB3352@redhat.com> Signed-off-by: Jason Baron Signed-off-by: Steven Rostedt --- include/linux/interrupt.h | 5 +++++ kernel/softirq.c | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel/softirq.c') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 472f11765f6..9b7e9d74347 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -258,6 +258,11 @@ enum NR_SOFTIRQS }; +/* map softirq index to softirq name. update 'softirq_to_name' in + * kernel/softirq.c when adding a new softirq. + */ +extern char *softirq_to_name[NR_SOFTIRQS]; + /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 7571bcb71be..9f90fdc039f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -53,6 +53,12 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +char *softirq_to_name[NR_SOFTIRQS] = { + "HI_SOFTIRQ", "TIMER_SOFTIRQ", "NET_TX_SOFTIRQ", "NET_RX_SOFTIRQ", + "BLOCK_SOFTIRQ", "TASKLET_SOFTIRQ", "SCHED_SOFTIRQ", "HRTIMER_SOFTIRQ", + "RCU_SOFTIRQ" +}; + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency @@ -209,9 +215,10 @@ restart: h->action(h); if (unlikely(prev_count != preempt_count())) { - printk(KERN_ERR "huh, entered softirq %td %p" + printk(KERN_ERR "huh, entered softirq %td %s %p" "with preempt_count %08x," " exited with %08x?\n", h - softirq_vec, + softirq_to_name[h - softirq_vec], h->action, prev_count, preempt_count()); preempt_count() = prev_count; } -- cgit v1.2.3-70-g09d2 From 39842323ceb368d2ea36ab7696aedbe296e13b61 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 12 Mar 2009 14:36:03 -0400 Subject: tracing: tracepoints for softirq entry/exit - tracepoints Introduce softirq entry/exit tracepoints. These are useful for augmenting existing tracers, and to figure out softirq frequencies and timings. [ s/irq_softirq_/softirq_/ for trace point names and Fixed printf format in TRACE_FORMAT macro - Steven Rostedt ] LKML-Reference: <20090312183603.GC3352@redhat.com> Signed-off-by: Jason Baron Signed-off-by: Steven Rostedt --- include/trace/irq_event_types.h | 12 ++++++++++++ kernel/softirq.c | 7 ++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'kernel/softirq.c') diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h index 214bb928fe9..85964ebd47e 100644 --- a/include/trace/irq_event_types.h +++ b/include/trace/irq_event_types.h @@ -40,4 +40,16 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + #undef TRACE_SYSTEM diff --git a/kernel/softirq.c b/kernel/softirq.c index 9f90fdc039f..a5e81231ca7 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* @@ -186,6 +187,9 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 +DEFINE_TRACE(softirq_entry); +DEFINE_TRACE(softirq_exit); + asmlinkage void __do_softirq(void) { struct softirq_action *h; @@ -212,8 +216,9 @@ restart: if (pending & 1) { int prev_count = preempt_count(); + trace_softirq_entry(h, softirq_vec); h->action(h); - + trace_softirq_exit(h, softirq_vec); if (unlikely(prev_count != preempt_count())) { printk(KERN_ERR "huh, entered softirq %td %s %p" "with preempt_count %08x," -- cgit v1.2.3-70-g09d2 From 899039e8746bb9a09b6487ddb8ab2275ce9d0256 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 13 Mar 2009 00:43:33 -0400 Subject: softirq: no need to have SOFTIRQ in softirq name Impact: clean up It is redundant to have 'SOFTIRQ' in the softirq names. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/softirq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/softirq.c') diff --git a/kernel/softirq.c b/kernel/softirq.c index a5e81231ca7..65ff3e3961b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -55,9 +55,8 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { - "HI_SOFTIRQ", "TIMER_SOFTIRQ", "NET_TX_SOFTIRQ", "NET_RX_SOFTIRQ", - "BLOCK_SOFTIRQ", "TASKLET_SOFTIRQ", "SCHED_SOFTIRQ", "HRTIMER_SOFTIRQ", - "RCU_SOFTIRQ" + "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", + "TASKLET", "SCHED", "HRTIMER", "RCU" }; /* -- cgit v1.2.3-70-g09d2 From 7f1e2ca9f04b02794597f60e7b1d43f0a1317939 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:27 +0100 Subject: hrtimer: fix rq->lock inversion (again) It appears I inadvertly introduced rq->lock recursion to the hrtimer_start() path when I delegated running already expired timers to softirq context. This patch fixes it by introducing a __hrtimer_start_range_ns() method that will not use raise_softirq_irqoff() but __raise_softirq_irqoff() which avoids the wakeup. It then also changes schedule() to check for pending softirqs and do the wakeup then, I'm not quite sure I like this last bit, nor am I convinced its really needed. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: paulus@samba.org LKML-Reference: <20090313112301.096138802@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 5 +++++ include/linux/interrupt.h | 1 + kernel/hrtimer.c | 55 +++++++++++++++++++++++++++++------------------ kernel/sched.c | 14 +++++++++--- kernel/softirq.c | 2 +- 5 files changed, 52 insertions(+), 25 deletions(-) (limited to 'kernel/softirq.c') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bd37078c2d7..0d2f7c8a33d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -336,6 +336,11 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode); extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long range_ns, const enum hrtimer_mode mode); +extern int +__hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, + const enum hrtimer_mode mode, int wakeup); + extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c68bffd182b..4528bf70866 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -294,6 +294,7 @@ extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +extern void wakeup_softirqd(void); /* This is the worklist that queues up per-cpu softirq work. * diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f394d2a42ca..cb8a15c1958 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -651,14 +651,20 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) * and expiry check is done in the hrtimer_interrupt or in the softirq. */ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + int wakeup) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - spin_unlock(&base->cpu_base->lock); - raise_softirq_irqoff(HRTIMER_SOFTIRQ); - spin_lock(&base->cpu_base->lock); + if (wakeup) { + spin_unlock(&base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + spin_lock(&base->cpu_base->lock); + } else + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + return 1; } + return 0; } @@ -703,7 +709,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + int wakeup) { return 0; } @@ -886,20 +893,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) return 0; } -/** - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU - * @timer: the timer to be added - * @tim: expiry time - * @delta_ns: "slack" range for the timer - * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) - * - * Returns: - * 0 on success - * 1 when the timer was active - */ -int -hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, - const enum hrtimer_mode mode) +int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, const enum hrtimer_mode mode, + int wakeup) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; @@ -940,12 +936,29 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n * XXX send_remote_softirq() ? */ if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) - hrtimer_enqueue_reprogram(timer, new_base); + hrtimer_enqueue_reprogram(timer, new_base, wakeup); unlock_hrtimer_base(timer, &flags); return ret; } + +/** + * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @delta_ns: "slack" range for the timer + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, const enum hrtimer_mode mode) +{ + return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1); +} EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); /** @@ -961,7 +974,7 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { - return hrtimer_start_range_ns(timer, tim, 0, mode); + return __hrtimer_start_range_ns(timer, tim, 0, mode, 1); } EXPORT_SYMBOL_GPL(hrtimer_start); diff --git a/kernel/sched.c b/kernel/sched.c index 196d48babbe..63256e3ede2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) spin_lock(&rt_b->rt_runtime_lock); for (;;) { + unsigned long delta; + ktime_t soft, hard; + if (hrtimer_active(&rt_b->rt_period_timer)) break; now = hrtimer_cb_get_time(&rt_b->rt_period_timer); hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); - hrtimer_start_expires(&rt_b->rt_period_timer, - HRTIMER_MODE_ABS); + + soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); + hard = hrtimer_get_expires(&rt_b->rt_period_timer); + delta = ktime_to_ns(ktime_sub(hard, soft)); + __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, + HRTIMER_MODE_ABS, 0); } spin_unlock(&rt_b->rt_runtime_lock); } @@ -1146,7 +1153,8 @@ static __init void init_hrtick(void) */ static void hrtick_start(struct rq *rq, u64 delay) { - hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); + __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, + HRTIMER_MODE_REL, 0); } static inline void init_hrtick(void) diff --git a/kernel/softirq.c b/kernel/softirq.c index 48775160430..accc85197c4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static inline void wakeup_softirqd(void) +void wakeup_softirqd(void) { /* Interrupts are disabled: no need to stop preemption */ struct task_struct *tsk = __get_cpu_var(ksoftirqd); -- cgit v1.2.3-70-g09d2 From 79d381c9f2354b594dcab9b04dfcc0debf7294fe Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Thu, 16 Apr 2009 19:30:18 -0400 Subject: kernel/softirq.c: fix sparse warning Fix sparse warning in kernel/softirq.c. warning: do-while statement is not a compound statement Signed-off-by: H Hartley Sweeten LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/softirq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/softirq.c') diff --git a/kernel/softirq.c b/kernel/softirq.c index 2fecefacdc5..b525dd34851 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -472,9 +472,9 @@ void tasklet_kill(struct tasklet_struct *t) printk("Attempt to kill tasklet from interrupt\n"); while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { - do + do { yield(); - while (test_bit(TASKLET_STATE_SCHED, &t->state)); + } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); clear_bit(TASKLET_STATE_SCHED, &t->state); -- cgit v1.2.3-70-g09d2