From c12172c0251761c54260376eb29a5f6547495580 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Jan 2009 20:30:06 -0800 Subject: rcu: fix rcutree grace-period-latency bug on small systems Impact: fix delays during bootup Kudos to Andi Kleen for finding a grace-period-latency problem! The problem was that the special-case code for small machines never updated the ->signaled field to indicate that grace-period initialization had completed, which prevented force_quiescent_state() from ever expediting grace periods. This problem resulted in grace periods extending for more than 20 seconds. Not subtle. I introduced this bug during my inspection process when I fixed a race between grace-period initialization and force_quiescent_state() execution. The following patch properly updates the ->signaled field for the "small"-system case (no more than 32 CPUs for 32-bit kernels and no more than 64 CPUs for 64-bit kernels). Reported-by: Andi Kleen Tested-by: Andi Kleen Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a342b032112..88d921c5c44 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -572,6 +572,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { rnp->qsmask = rnp->qsmaskinit; + rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ spin_unlock_irqrestore(&rnp->lock, flags); return; } -- cgit v1.2.3-70-g09d2 From 90a4d2c0106bb690f0b6af3d506febc35c658aa7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Jan 2009 11:41:11 -0800 Subject: rcu: make treercu safe for suspend and resume Impact: fix kernel warnings [and potential crash] during suspend+resume Kudos to both Dhaval Giani and Jens Axboe for finding a bug in treercu that causes warnings after suspend-resume cycles in Dhaval's case and during stress tests in Jens's case. It would also probably cause failures if heavily stressed. The solution, ironically enough, is to revert to rcupreempt's code for initializing the dynticks state. And the patch even results in smaller code -- so what was I thinking??? This is 2.6.29 material, given that people really do suspend and resume Linux these days. ;-) Reported-by: Dhaval Giani Reported-by: Jens Axboe Tested-by: Dhaval Giani Tested-by: Jens Axboe Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 88d921c5c44..f2d8638e6c6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -79,7 +79,10 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); #ifdef CONFIG_NO_HZ -DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks); +DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { + .dynticks_nesting = 1, + .dynticks = 1, +}; #endif /* #ifdef CONFIG_NO_HZ */ static int blimit = 10; /* Maximum callbacks per softirq. */ @@ -1380,13 +1383,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) static void __cpuinit rcu_online_cpu(int cpu) { -#ifdef CONFIG_NO_HZ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - rdtp->dynticks_nesting = 1; - rdtp->dynticks |= 1; /* need consecutive #s even for hotplug. */ - rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1; -#endif /* #ifdef CONFIG_NO_HZ */ rcu_init_percpu_data(cpu, &rcu_state); rcu_init_percpu_data(cpu, &rcu_bh_state); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); -- cgit v1.2.3-70-g09d2