From 4e857c58efeb99393cba5a5d0d8ec7117183137c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 17 Mar 2014 18:06:10 +0100 Subject: arch: Mass conversion of smp_mb__*() Mostly scripted conversion of the smp_mb__* barriers. Signed-off-by: Peter Zijlstra Acked-by: Paul E. McKenney Link: http://lkml.kernel.org/n/tip-55dhyhocezdw1dg7u19hmh1u@git.kernel.org Cc: Linus Torvalds Cc: linux-arch@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/rcu/tree.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0c47e300210..88b4a1dcb58 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -387,9 +387,9 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, } rcu_prepare_for_idle(smp_processor_id()); /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ - smp_mb__before_atomic_inc(); /* See above. */ + smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); - smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ + smp_mb__after_atomic(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); /* @@ -507,10 +507,10 @@ void rcu_irq_exit(void) static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, int user) { - smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ + smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ - smp_mb__after_atomic_inc(); /* See above. */ + smp_mb__after_atomic(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); @@ -635,10 +635,10 @@ void rcu_nmi_enter(void) (atomic_read(&rdtp->dynticks) & 0x1)) return; rdtp->dynticks_nmi_nesting++; - smp_mb__before_atomic_inc(); /* Force delay from prior write. */ + smp_mb__before_atomic(); /* Force delay from prior write. */ atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ - smp_mb__after_atomic_inc(); /* See above. */ + smp_mb__after_atomic(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); } @@ -657,9 +657,9 @@ void rcu_nmi_exit(void) --rdtp->dynticks_nmi_nesting != 0) return; /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ - smp_mb__before_atomic_inc(); /* See above. */ + smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); - smp_mb__after_atomic_inc(); /* Force delay to next write. */ + smp_mb__after_atomic(); /* Force delay to next write. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } @@ -2790,7 +2790,7 @@ void synchronize_sched_expedited(void) s = atomic_long_read(&rsp->expedited_done); if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { /* ensure test happens before caller kfree */ - smp_mb__before_atomic_inc(); /* ^^^ */ + smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(&rsp->expedited_workdone1); return; } @@ -2808,7 +2808,7 @@ void synchronize_sched_expedited(void) s = atomic_long_read(&rsp->expedited_done); if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) { /* ensure test happens before caller kfree */ - smp_mb__before_atomic_inc(); /* ^^^ */ + smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(&rsp->expedited_workdone2); return; } @@ -2837,7 +2837,7 @@ void synchronize_sched_expedited(void) s = atomic_long_read(&rsp->expedited_done); if (ULONG_CMP_GE((ulong)s, (ulong)snap)) { /* ensure test happens before caller kfree */ - smp_mb__before_atomic_inc(); /* ^^^ */ + smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(&rsp->expedited_done_lost); break; } -- cgit v1.2.3-70-g09d2 From 91dc95427a0d30ac2c58d6e943c7f40a3f25d908 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Feb 2014 09:47:13 -0800 Subject: rcu: Protect ->gp_flags accesses with ACCESS_ONCE() A number of ->gp_flags accesses don't have ACCESS_ONCE(), but all of the can race against other loads or stores. This commit therefore applies ACCESS_ONCE() to the unprotected ->gp_flags accesses. Reported-by: Alexey Roytman Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0c47e300210..2c53ac924ca 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1403,12 +1403,12 @@ static int rcu_gp_init(struct rcu_state *rsp) rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - if (rsp->gp_flags == 0) { + if (!ACCESS_ONCE(rsp->gp_flags)) { /* Spurious wakeup, tell caller to go back to sleep. */ raw_spin_unlock_irq(&rnp->lock); return 0; } - rsp->gp_flags = 0; /* Clear all flags: New grace period. */ + ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */ if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { /* @@ -1501,7 +1501,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - rsp->gp_flags &= ~RCU_GP_FLAG_FQS; + ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS; raw_spin_unlock_irq(&rnp->lock); } return fqs_state; @@ -1566,7 +1566,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rdp = this_cpu_ptr(rsp->rda); rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ if (cpu_needs_another_gp(rsp, rdp)) { - rsp->gp_flags = RCU_GP_FLAG_INIT; + ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("newreq")); @@ -1695,7 +1695,7 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, */ return; } - rsp->gp_flags = RCU_GP_FLAG_INIT; + ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("newreq")); @@ -2320,7 +2320,7 @@ static void force_quiescent_state(struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp_old->lock, flags); return; /* Someone beat us to it. */ } - rsp->gp_flags |= RCU_GP_FLAG_FQS; + ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS; raw_spin_unlock_irqrestore(&rnp_old->lock, flags); wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ } -- cgit v1.2.3-70-g09d2 From 83ebe63ead0fe60e4b548730800cb68293ce098b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Mar 2014 11:09:10 -0800 Subject: rcu: Print negatives for stall-warning counter wraparound The print_other_cpu_stall() and print_cpu_stall() functions print grace-period numbers using an unsigned format, which means that the number one less than zero is a very large number. This commit therefore causes these numbers to be printed with a signed format in order to improve readability of the RCU CPU stall-warning output. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2c53ac924ca..b33c29a99df 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -932,9 +932,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp) print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; - pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n", + pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), - rsp->gpnum, rsp->completed, totqlen); + (long)rsp->gpnum, (long)rsp->completed, totqlen); if (ndetected == 0) pr_err("INFO: Stall ended before state dump start\n"); else if (!trigger_all_cpu_backtrace()) @@ -971,8 +971,9 @@ static void print_cpu_stall(struct rcu_state *rsp) print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; - pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n", - jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen); + pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", + jiffies - rsp->gp_start, + (long)rsp->gpnum, (long)rsp->completed, totqlen); if (!trigger_all_cpu_backtrace()) dump_stack(); -- cgit v1.2.3-70-g09d2 From 365187fbc04fd55766bf6a94e37e558505bf480a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Mar 2014 10:55:52 -0700 Subject: rcu: Update cpu_needs_another_gp() for futures from non-NOCB CPUs In the old days, the only source of requests for future grace periods was NOCB CPUs. This has changed: CPUs routinely post requests for future grace periods in order to promote power efficiency and reduce OS jitter with minimal impact on grace-period latency. This commit therefore updates cpu_needs_another_gp() to invoke rcu_future_needs_gp() instead of rcu_nocb_needs_gp(). The latter is no longer used, so is now removed. This commit also adds tracing for the irq_work_queue() wakeup case. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 39 +++++++++++++++++++++++++++++---------- kernel/rcu/tree.h | 1 - kernel/rcu/tree_plugin.h | 18 ------------------ 3 files changed, 29 insertions(+), 29 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b33c29a99df..b4688993e95 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -323,6 +323,28 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) rdp->nxttail[RCU_DONE_TAIL] != NULL; } +/* + * Return the root node of the specified rcu_state structure. + */ +static struct rcu_node *rcu_get_root(struct rcu_state *rsp) +{ + return &rsp->node[0]; +} + +/* + * Is there any need for future grace periods? + * Interrupts must be disabled. If the caller does not hold the root + * rnp_node structure's ->lock, the results are advisory only. + */ +static int rcu_future_needs_gp(struct rcu_state *rsp) +{ + struct rcu_node *rnp = rcu_get_root(rsp); + int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1; + int *fp = &rnp->need_future_gp[idx]; + + return ACCESS_ONCE(*fp); +} + /* * Does the current CPU require a not-yet-started grace period? * The caller must have disabled interrupts to prevent races with @@ -335,7 +357,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_gp_in_progress(rsp)) return 0; /* No, a grace period is already in progress. */ - if (rcu_nocb_needs_gp(rsp)) + if (rcu_future_needs_gp(rsp)) return 1; /* Yes, a no-CBs CPU needs one. */ if (!rdp->nxttail[RCU_NEXT_TAIL]) return 0; /* No, this is a no-CBs (or offline) CPU. */ @@ -349,14 +371,6 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) return 0; /* No grace period needed. */ } -/* - * Return the root node of the specified rcu_state structure. - */ -static struct rcu_node *rcu_get_root(struct rcu_state *rsp) -{ - return &rsp->node[0]; -} - /* * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state * @@ -1672,6 +1686,8 @@ static void rsp_wakeup(struct irq_work *work) /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); + trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), + "Workqueuewoken"); } /* @@ -1706,8 +1722,11 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, * the wakeup to interrupt context. And don't bother waking * up the running kthread. */ - if (current != rsp->gp_kthread) + if (current != rsp->gp_kthread) { + trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), + "Workqueuewake"); irq_work_queue(&rsp->wakeup_work); + } } /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 75dc3c39a02..7b572c5c65e 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -547,7 +547,6 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); static void print_cpu_stall_info_end(void); static void zero_cpu_stall_ticks(struct rcu_data *rdp); static void increment_cpu_stall_ticks(void); -static int rcu_nocb_needs_gp(struct rcu_state *rsp); static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f9c9057239a..f60dd6ea833 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2067,19 +2067,6 @@ static int __init parse_rcu_nocb_poll(char *arg) } early_param("rcu_nocb_poll", parse_rcu_nocb_poll); -/* - * Do any no-CBs CPUs need another grace period? - * - * Interrupts must be disabled. If the caller does not hold the root - * rnp_node structure's ->lock, the results are advisory only. - */ -static int rcu_nocb_needs_gp(struct rcu_state *rsp) -{ - struct rcu_node *rnp = rcu_get_root(rsp); - - return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; -} - /* * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended * grace period. @@ -2402,11 +2389,6 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static int rcu_nocb_needs_gp(struct rcu_state *rsp) -{ - return 0; -} - static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { } -- cgit v1.2.3-70-g09d2 From 9b67122ae3da3018c966148233739116ed89502a Mon Sep 17 00:00:00 2001 From: Iulia Manda Date: Tue, 11 Mar 2014 13:18:22 +0200 Subject: rcu: Remove unused rcu_data structure field The ->preemptible field in rcu_data is only initialized in the function rcu_init_percpu_data(), and never used. This commit therefore removes this field. Signed-off-by: Iulia Manda Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 6 ++---- kernel/rcu/tree.h | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b4688993e95..2cc39c78108 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3180,7 +3180,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) * that this CPU cannot possibly have any RCU callbacks in flight yet. */ static void -rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) +rcu_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; unsigned long mask; @@ -3193,7 +3193,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->beenonline = 1; /* We have now been online. */ - rdp->preemptible = preemptible; rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -3237,8 +3236,7 @@ static void rcu_prepare_cpu(int cpu) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - rcu_init_percpu_data(cpu, rsp, - strcmp(rsp->name, "rcu_preempt") == 0); + rcu_init_percpu_data(cpu, rsp); } /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 7b572c5c65e..13766ad2f4e 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -252,7 +252,6 @@ struct rcu_data { bool passed_quiesce; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ - bool preemptible; /* Preemptible RCU? */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ #ifdef CONFIG_RCU_CPU_STALL_INFO -- cgit v1.2.3-70-g09d2 From 4fc5b75537d4f56577ad00355b4cd09627deb3c3 Mon Sep 17 00:00:00 2001 From: Iulia Manda Date: Tue, 11 Mar 2014 15:22:28 +0200 Subject: rcu: Protect uses of jiffies_stall field with ACCESS_ONCE() Some of the uses of the rcu_state structure's ->jiffies_stall field do not use ACCESS_ONCE(), despite there being unprotected accesses. This commit therefore uses the ACCESS_ONCE() macro to protect this field. Signed-off-by: Iulia Manda Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2cc39c78108..c624415f838 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -865,7 +865,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) rsp->gp_start = j; smp_wmb(); /* Record start time before stall time. */ j1 = rcu_jiffies_till_stall_check(); - rsp->jiffies_stall = j + j1; + ACCESS_ONCE(rsp->jiffies_stall) = j + j1; rsp->jiffies_resched = j + j1 / 2; } @@ -904,12 +904,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp) /* Only let one CPU complain about others per time interval. */ raw_spin_lock_irqsave(&rnp->lock, flags); - delta = jiffies - rsp->jiffies_stall; + delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; raw_spin_unlock_irqrestore(&rnp->lock, flags); /* @@ -992,8 +992,8 @@ static void print_cpu_stall(struct rcu_state *rsp) dump_stack(); raw_spin_lock_irqsave(&rnp->lock, flags); - if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) - rsp->jiffies_stall = jiffies + + if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall))) + ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1077,7 +1077,7 @@ void rcu_cpu_stall_reset(void) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - rsp->jiffies_stall = jiffies + ULONG_MAX / 2; + ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2; } /* -- cgit v1.2.3-70-g09d2 From 48a7639ce80cf279834d0d44865e49ecd714f37d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Mar 2014 13:02:16 -0700 Subject: rcu: Make callers awaken grace-period kthread The rcu_start_gp_advanced() function currently uses irq_work_queue() to defer wakeups of the RCU grace-period kthread. This deferring is necessary to avoid RCU-scheduler deadlocks involving the rcu_node structure's lock, meaning that RCU cannot call any of the scheduler's wake-up functions while holding one of these locks. Unfortunately, the second and subsequent calls to irq_work_queue() are ignored, and the first call will be ignored (aside from queuing the work item) if the scheduler-clock tick is turned off. This is OK for many uses, especially those where irq_work_queue() is called from an interrupt or softirq handler, because in those cases the scheduler-clock-tick state will be re-evaluated, which will turn the scheduler-clock tick back on. On the next tick, any deferred work will then be processed. However, this strategy does not always work for RCU, which can be invoked at process level from idle CPUs. In this case, the tick might never be turned back on, indefinitely defering a grace-period start request. Note that the RCU CPU stall detector cannot see this condition, because there is no RCU grace period in progress. Therefore, we can (and do!) see long tens-of-seconds stalls in grace-period handling. In theory, we could see a full grace-period hang, but rcutorture testing to date has seen only the tens-of-seconds stalls. Event tracing demonstrates that irq_work_queue() is being called repeatedly to no effect during these stalls: The "newreq" event appears repeatedly from a task that is not one of the grace-period kthreads. In theory, irq_work_queue() might be fixed to avoid this sort of issue, but RCU's requirements are unusual and it is quite straightforward to pass wake-up responsibility up through RCU's call chain, so that the wakeup happens when the offending locks are released. This commit therefore makes this change. The rcu_start_gp_advanced(), rcu_start_future_gp(), rcu_accelerate_cbs(), rcu_advance_cbs(), __note_gp_changes(), and rcu_start_gp() functions now return a boolean which indicates when a wake-up is needed. A new rcu_gp_kthread_wake() does the wakeup when it is necessary and safe to do so: No self-wakes, no wake-ups if the ->gp_flags field indicates there is no need (as in someone else did the wake-up before we got around to it), and no wake-ups before the grace-period kthread has been created. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Frederic Weisbecker Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 137 +++++++++++++++++++++++++++++------------------ kernel/rcu/tree.h | 1 - kernel/rcu/tree_plugin.h | 10 +++- 3 files changed, 94 insertions(+), 54 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c624415f838..fca911b6b29 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -243,7 +243,7 @@ static ulong jiffies_till_next_fqs = ULONG_MAX; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); -static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, +static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp, bool *isidle, @@ -1138,15 +1138,18 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, /* * Start some future grace period, as needed to handle newly arrived * callbacks. The required future grace periods are recorded in each - * rcu_node structure's ->need_future_gp field. + * rcu_node structure's ->need_future_gp field. Returns true if there + * is reason to awaken the grace-period kthread. * * The caller must hold the specified rcu_node structure's ->lock. */ -static unsigned long __maybe_unused -rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) +static bool __maybe_unused +rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long *c_out) { unsigned long c; int i; + bool ret = false; struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); /* @@ -1157,7 +1160,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf")); if (rnp->need_future_gp[c & 0x1]) { trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf")); - return c; + goto out; } /* @@ -1171,7 +1174,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { rnp->need_future_gp[c & 0x1]++; trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); - return c; + goto out; } /* @@ -1212,12 +1215,15 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot")); } else { trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot")); - rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); + ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); } unlock_out: if (rnp != rnp_root) raw_spin_unlock(&rnp_root->lock); - return c; +out: + if (c_out != NULL) + *c_out = c; + return ret; } /* @@ -1240,6 +1246,22 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) return needmore; } +/* + * Awaken the grace-period kthread for the specified flavor of RCU. + * Don't do a self-awaken, and don't bother awakening when there is + * nothing for the grace-period kthread to do (as in several CPUs + * raced to awaken, and we lost), and finally don't try to awaken + * a kthread that has not yet been created. + */ +static void rcu_gp_kthread_wake(struct rcu_state *rsp) +{ + if (current == rsp->gp_kthread || + !ACCESS_ONCE(rsp->gp_flags) || + !rsp->gp_kthread) + return; + wake_up(&rsp->gp_wq); +} + /* * If there is room, assign a ->completed number to any callbacks on * this CPU that have not already been assigned. Also accelerate any @@ -1247,19 +1269,21 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) * since proven to be too conservative, which can happen if callbacks get * assigned a ->completed number while RCU is idle, but with reference to * a non-root rcu_node structure. This function is idempotent, so it does - * not hurt to call it repeatedly. + * not hurt to call it repeatedly. Returns an flag saying that we should + * awaken the RCU grace-period kthread. * * The caller must hold rnp->lock with interrupts disabled. */ -static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, +static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { unsigned long c; int i; + bool ret; /* If the CPU has no callbacks, nothing to do. */ if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) - return; + return false; /* * Starting from the sublist containing the callbacks most @@ -1288,7 +1312,7 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, * be grouped into. */ if (++i >= RCU_NEXT_TAIL) - return; + return false; /* * Assign all subsequent callbacks' ->completed number to the next @@ -1300,13 +1324,14 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, rdp->nxtcompleted[i] = c; } /* Record any needed additional grace periods. */ - rcu_start_future_gp(rnp, rdp); + ret = rcu_start_future_gp(rnp, rdp, NULL); /* Trace depending on how much we were able to accelerate. */ if (!*rdp->nxttail[RCU_WAIT_TAIL]) trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); else trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); + return ret; } /* @@ -1315,17 +1340,18 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL * sublist. This function is idempotent, so it does not hurt to * invoke it repeatedly. As long as it is not invoked -too- often... + * Returns true if the RCU grace-period kthread needs to be awakened. * * The caller must hold rnp->lock with interrupts disabled. */ -static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, +static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { int i, j; /* If the CPU has no callbacks, nothing to do. */ if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) - return; + return false; /* * Find all callbacks whose ->completed numbers indicate that they @@ -1349,26 +1375,30 @@ static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, } /* Classify any remaining callbacks. */ - rcu_accelerate_cbs(rsp, rnp, rdp); + return rcu_accelerate_cbs(rsp, rnp, rdp); } /* * Update CPU-local rcu_data state to record the beginnings and ends of * grace periods. The caller must hold the ->lock of the leaf rcu_node * structure corresponding to the current CPU, and must have irqs disabled. + * Returns true if the grace-period kthread needs to be awakened. */ -static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) { + bool ret; + /* Handle the ends of any preceding grace periods first. */ if (rdp->completed == rnp->completed) { /* No grace period end, so just accelerate recent callbacks. */ - rcu_accelerate_cbs(rsp, rnp, rdp); + ret = rcu_accelerate_cbs(rsp, rnp, rdp); } else { /* Advance callbacks. */ - rcu_advance_cbs(rsp, rnp, rdp); + ret = rcu_advance_cbs(rsp, rnp, rdp); /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; @@ -1387,11 +1417,13 @@ static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struc rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); } + return ret; } static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; + bool needwake; struct rcu_node *rnp; local_irq_save(flags); @@ -1403,8 +1435,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) return; } smp_mb__after_unlock_lock(); - __note_gp_changes(rsp, rnp, rdp); + needwake = __note_gp_changes(rsp, rnp, rdp); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (needwake) + rcu_gp_kthread_wake(rsp); } /* @@ -1468,7 +1502,7 @@ static int rcu_gp_init(struct rcu_state *rsp) WARN_ON_ONCE(rnp->completed != rsp->completed); ACCESS_ONCE(rnp->completed) = rsp->completed; if (rnp == rdp->mynode) - __note_gp_changes(rsp, rnp, rdp); + (void)__note_gp_changes(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); trace_rcu_grace_period_init(rsp->name, rnp->gpnum, rnp->level, rnp->grplo, @@ -1528,6 +1562,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) static void rcu_gp_cleanup(struct rcu_state *rsp) { unsigned long gp_duration; + bool needgp = false; int nocb = 0; struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); @@ -1563,7 +1598,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) ACCESS_ONCE(rnp->completed) = rsp->gpnum; rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) - __note_gp_changes(rsp, rnp, rdp); + needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; /* smp_mb() provided by prior unlock-lock pair. */ nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); @@ -1579,8 +1614,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); - rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ - if (cpu_needs_another_gp(rsp, rdp)) { + /* Advance CBs to reduce false positives below. */ + needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; + if (needgp || cpu_needs_another_gp(rsp, rdp)) { ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -1680,16 +1716,6 @@ static int __noreturn rcu_gp_kthread(void *arg) } } -static void rsp_wakeup(struct irq_work *work) -{ - struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work); - - /* Wake up rcu_gp_kthread() to start the grace period. */ - wake_up(&rsp->gp_wq); - trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), - "Workqueuewoken"); -} - /* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold @@ -1698,8 +1724,10 @@ static void rsp_wakeup(struct irq_work *work) * Note that it is legal for a dying CPU (which is marked as offline) to * invoke this function. This can happen when the dying CPU reports its * quiescent state. + * + * Returns true if the grace-period kthread must be awakened. */ -static void +static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { @@ -1710,7 +1738,7 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, * or a grace period is already in progress. * Either way, don't start a new grace period. */ - return; + return false; } ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -1719,14 +1747,9 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, /* * We can't do wakeups while holding the rnp->lock, as that * could cause possible deadlocks with the rq->lock. Defer - * the wakeup to interrupt context. And don't bother waking - * up the running kthread. + * the wakeup to our caller. */ - if (current != rsp->gp_kthread) { - trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), - "Workqueuewake"); - irq_work_queue(&rsp->wakeup_work); - } + return true; } /* @@ -1735,12 +1758,14 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, * is invoked indirectly from rcu_advance_cbs(), which would result in * endless recursion -- or would do so if it wasn't for the self-deadlock * that is encountered beforehand. + * + * Returns true if the grace-period kthread needs to be awakened. */ -static void -rcu_start_gp(struct rcu_state *rsp) +static bool rcu_start_gp(struct rcu_state *rsp) { struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); + bool ret = false; /* * If there is no grace period in progress right now, any @@ -1750,8 +1775,9 @@ rcu_start_gp(struct rcu_state *rsp) * resulting in pointless grace periods. So, advance callbacks * then start the grace period! */ - rcu_advance_cbs(rsp, rnp, rdp); - rcu_start_gp_advanced(rsp, rnp, rdp); + ret = rcu_advance_cbs(rsp, rnp, rdp) || ret; + ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret; + return ret; } /* @@ -1840,6 +1866,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; unsigned long mask; + bool needwake; struct rcu_node *rnp; rnp = rdp->mynode; @@ -1868,9 +1895,11 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. */ - rcu_accelerate_cbs(rsp, rnp, rdp); + needwake = rcu_accelerate_cbs(rsp, rnp, rdp); rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ + if (needwake) + rcu_gp_kthread_wake(rsp); } } @@ -2354,6 +2383,7 @@ static void __rcu_process_callbacks(struct rcu_state *rsp) { unsigned long flags; + bool needwake; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); WARN_ON_ONCE(rdp->beenonline == 0); @@ -2365,8 +2395,10 @@ __rcu_process_callbacks(struct rcu_state *rsp) local_irq_save(flags); if (cpu_needs_another_gp(rsp, rdp)) { raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ - rcu_start_gp(rsp); + needwake = rcu_start_gp(rsp); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); + if (needwake) + rcu_gp_kthread_wake(rsp); } else { local_irq_restore(flags); } @@ -2424,6 +2456,8 @@ static void invoke_rcu_core(void) static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, struct rcu_head *head, unsigned long flags) { + bool needwake; + /* * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. @@ -2453,8 +2487,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, raw_spin_lock(&rnp_root->lock); smp_mb__after_unlock_lock(); - rcu_start_gp(rsp); + needwake = rcu_start_gp(rsp); raw_spin_unlock(&rnp_root->lock); + if (needwake) + rcu_gp_kthread_wake(rsp); } else { /* Give the grace period a kick. */ rdp->blimit = LONG_MAX; @@ -3440,7 +3476,6 @@ static void __init rcu_init_one(struct rcu_state *rsp, rsp->rda = rda; init_waitqueue_head(&rsp->gp_wq); - init_irq_work(&rsp->wakeup_work, rsp_wakeup); rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 13766ad2f4e..cdbb392d5b4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -461,7 +461,6 @@ struct rcu_state { const char *name; /* Name of structure. */ char abbr; /* Abbreviated name. */ struct list_head flavors; /* List of RCU flavors. */ - struct irq_work wakeup_work; /* Postponed wakeups */ }; /* Values for rcu_state structure's gp_flags field. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f60dd6ea833..0cb0816036c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1744,6 +1744,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) static void rcu_prepare_for_idle(int cpu) { #ifndef CONFIG_RCU_NOCB_CPU_ALL + bool needwake; struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); struct rcu_node *rnp; @@ -1792,8 +1793,10 @@ static void rcu_prepare_for_idle(int cpu) rnp = rdp->mynode; raw_spin_lock(&rnp->lock); /* irqs already disabled. */ smp_mb__after_unlock_lock(); - rcu_accelerate_cbs(rsp, rnp, rdp); + needwake = rcu_accelerate_cbs(rsp, rnp, rdp); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + if (needwake) + rcu_gp_kthread_wake(rsp); } #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } @@ -2230,12 +2233,15 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) unsigned long c; bool d; unsigned long flags; + bool needwake; struct rcu_node *rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - c = rcu_start_future_gp(rnp, rdp); + needwake = rcu_start_future_gp(rnp, rdp, &c); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (needwake) + rcu_gp_kthread_wake(rdp->rsp); /* * Wait for the grace period. Do so interruptibly to avoid messing -- cgit v1.2.3-70-g09d2 From 7941dbdebe2a1fda31aefa033794510f95720a5a Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Mon, 17 Mar 2014 18:33:28 +0200 Subject: rcu: Add event tracing to dyntick_save_progress_counter(). This patch adds event tracing to dyntick_save_progress_counter() in the case where it returns 1. I used the tracepoint string "dti" because this function returns 1 in case the CPU is in dynticks idle mode. Signed-off-by: Andreea-Cristina Bernat Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index fca911b6b29..c6fd0f15425 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -772,7 +772,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, { rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); rcu_sysidle_check_cpu(rdp, isidle, maxj); - return (rdp->dynticks_snap & 0x1) == 0; + if ((rdp->dynticks_snap & 0x1) == 0) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); + return 1; + } else { + return 0; + } } /* -- cgit v1.2.3-70-g09d2 From 595f3900f6b4221403493c530138b8dad2bd87f3 Mon Sep 17 00:00:00 2001 From: Himangi Saraogi Date: Tue, 18 Mar 2014 22:52:26 +0530 Subject: rcu: Replace NR_CPUS with nr_cpu_ids This patch replaces NR_CPUS with nr_cpu_ids as NR_CPUS should consider cpumask_var_t. Signed-off-by: Himangi Saraogi Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c6fd0f15425..6724bd98da7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3461,8 +3461,8 @@ static void __init rcu_init_one(struct rcu_state *rsp, rnp->qsmaskinit = 0; rnp->grplo = j * cpustride; rnp->grphi = (j + 1) * cpustride - 1; - if (rnp->grphi >= NR_CPUS) - rnp->grphi = NR_CPUS - 1; + if (rnp->grphi >= nr_cpu_ids) + rnp->grphi = nr_cpu_ids - 1; if (i == 0) { rnp->grpnum = 0; rnp->grpmask = 0; -- cgit v1.2.3-70-g09d2 From 495aa969dbaef2e3d28094a2b3c752d069932748 Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Tue, 18 Mar 2014 20:48:48 +0200 Subject: rcu: Consolidate kfree_call_rcu() to use rcu_state pointer kfree_call_rcu is defined two times. When defined under CONFIG_TREE_PREEMPT_RCU, it uses rcu_preempt_state. Otherwise, it uses rcu_sched_state. This patch uses the rcu_state_pointer to combine the two definitions into one. The resulting function is placed after the closing of the preprocessor conditional CONFIG_TREE_PREEMPT_RCU. Signed-off-by: Andreea-Cristina Bernat Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 14 ++++++++++++++ kernel/rcu/tree_plugin.h | 30 ------------------------------ 2 files changed, 14 insertions(+), 30 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6724bd98da7..f3317c18b35 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2597,6 +2597,20 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); +/* + * Queue an RCU callback for lazy invocation after a grace period. + * This will likely be later named something like "call_rcu_lazy()", + * but this change will require some way of tagging the lazy RCU + * callbacks in the list of pending callbacks. Until then, this + * function may only be called from __kfree_rcu(). + */ +void kfree_call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, rcu_state, -1, 1); +} +EXPORT_SYMBOL_GPL(kfree_call_rcu); + /* * Because a context switch is a grace period for RCU-sched and RCU-bh, * any blocking grace-period wait automatically implies a grace period diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0cb0816036c..4918a1243ef 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -688,20 +688,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); -/* - * Queue an RCU callback for lazy invocation after a grace period. - * This will likely be later named something like "call_rcu_lazy()", - * but this change will require some way of tagging the lazy RCU - * callbacks in the list of pending callbacks. Until then, this - * function may only be called from __kfree_rcu(). - */ -void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - __call_rcu(head, func, &rcu_preempt_state, -1, 1); -} -EXPORT_SYMBOL_GPL(kfree_call_rcu); - /** * synchronize_rcu - wait until a grace period has elapsed. * @@ -1079,22 +1065,6 @@ static void rcu_preempt_check_callbacks(int cpu) { } -/* - * Queue an RCU callback for lazy invocation after a grace period. - * This will likely be later named something like "call_rcu_lazy()", - * but this change will require some way of tagging the lazy RCU - * callbacks in the list of pending callbacks. Until then, this - * function may only be called from __kfree_rcu(). - * - * Because there is no preemptible RCU, we use RCU-sched instead. - */ -void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - __call_rcu(head, func, &rcu_sched_state, -1, 1); -} -EXPORT_SYMBOL_GPL(kfree_call_rcu); - /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. -- cgit v1.2.3-70-g09d2 From a381d757d93f6e43063f74888676edd6216d0aff Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Wed, 19 Mar 2014 11:18:31 +0200 Subject: rcu: Merge rcu_sched_force_quiescent_state() with rcu_force_quiescent_state() This patch merges the function rcu_force_quiescent_state() with rcu_sched_force_quiescent_state(), using the rcu_state pointer. Firstly, the rcu_sched_force_quiescent_state() function is deleted from the file kernel/rcu/tree.c. Also, the rcu_force_quiescent_state() function that was calling force_quiescent_state with the argument rcu_preempt_state pointer was deleted as well. The new function that combines the old ones uses the rcu_state pointer and is located after rcu_batches_completed_bh() in kernel/rcu/tree.c. Signed-off-by: Andreea-Cristina Bernat Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 9 +++++++++ kernel/rcu/tree_plugin.h | 19 ------------------- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f3317c18b35..dbf43f5c86f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -270,6 +270,15 @@ long rcu_batches_completed_bh(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +/* + * Force a quiescent state. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(rcu_state); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Force a quiescent state for RCU BH. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4918a1243ef..1af19e00689 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -148,15 +148,6 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); -/* - * Force a quiescent state for preemptible RCU. - */ -void rcu_force_quiescent_state(void) -{ - force_quiescent_state(&rcu_preempt_state); -} -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); - /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -976,16 +967,6 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); -/* - * Force a quiescent state for RCU, which, because there is no preemptible - * RCU, becomes the same as rcu-sched. - */ -void rcu_force_quiescent_state(void) -{ - rcu_sched_force_quiescent_state(); -} -EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); - /* * Because preemptible RCU does not exist, we never have to check for * CPUs being in quiescent states. -- cgit v1.2.3-70-g09d2 From 8c96ae1dfa1608b92ddbf8bb285d7269832e4ac0 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 14 Apr 2014 10:25:43 -0700 Subject: rcu: Remove duplicate resched_cpu() declaration Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index dbf43f5c86f..ecd7e046de7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -975,12 +975,6 @@ static void print_other_cpu_stall(struct rcu_state *rsp) force_quiescent_state(rsp); /* Kick them all. */ } -/* - * This function really isn't for public consumption, but RCU is special in - * that context switches can allow the state machine to make progress. - */ -extern void resched_cpu(int cpu); - static void print_cpu_stall(struct rcu_state *rsp) { int cpu; -- cgit v1.2.3-70-g09d2 From fa07a58f71ee23a82597ce337126982d0cc60b72 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 15 Apr 2014 12:20:12 -0700 Subject: rcu: Replace __this_cpu_ptr() uses with raw_cpu_ptr() __this_cpu_ptr is being phased out. One special case is increment_cpu_stall_ticks(). A per cpu variable is incremented so use raw_cpu_inc(). Cc: Dipankar Sarma Signed-off-by: Christoph Lameter Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcu/tree.c | 6 +++--- kernel/rcu/tree_plugin.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ecd7e046de7..d3e023e24c6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2008,7 +2008,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) { int i; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) @@ -2392,7 +2392,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) { unsigned long flags; bool needwake; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); WARN_ON_ONCE(rdp->beenonline == 0); @@ -3066,7 +3066,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp) static void rcu_barrier_func(void *type) { struct rcu_state *rsp = type; - struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f7593c2536b..2e579c38bd9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1809,7 +1809,7 @@ static void rcu_oom_notify_cpu(void *unused) struct rcu_data *rdp; for_each_rcu_flavor(rsp) { - rdp = __this_cpu_ptr(rsp->rda); + rdp = raw_cpu_ptr(rsp->rda); if (rdp->qlen_lazy != 0) { atomic_inc(&oom_callback_count); rsp->call(&rdp->oom_head, rcu_oom_callback); @@ -1951,7 +1951,7 @@ static void increment_cpu_stall_ticks(void) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - __this_cpu_ptr(rsp->rda)->ticks_this_gp++; + raw_cpu_inc(rsp->rda->ticks_this_gp); } #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ -- cgit v1.2.3-70-g09d2 From ad0dc7f94dbf417b1c7d42e1f0b250f045b27f8f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Feb 2014 10:51:42 -0800 Subject: rcutorture: Add forward-progress checking for writer The rcutorture output currently does not distinguish between stalls in the RCU implementation and stalls in the rcu_torture_writer() kthreads. This commit therefore adds some diagnostics to help distinguish between these two conditions, at least for the non-SRCU implementations. (SRCU does not provide evidence of update-side forward progress by design.) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 19 +++++++++++++++++++ kernel/rcu/rcutorture.c | 37 +++++++++++++++++++++++++++++++++++++ kernel/rcu/tree.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 00a7fd61b3c..82973738125 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -51,7 +51,17 @@ extern int rcu_expedited; /* for sysctl */ extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +enum rcutorture_type { + RCU_FLAVOR, + RCU_BH_FLAVOR, + RCU_SCHED_FLAVOR, + SRCU_FLAVOR, + INVALID_RCU_FLAVOR +}; + #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) +void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, + unsigned long *gpnum, unsigned long *completed); void rcutorture_record_test_transition(void); void rcutorture_record_progress(unsigned long vernum); void do_trace_rcu_torture_read(const char *rcutorturename, @@ -60,6 +70,15 @@ void do_trace_rcu_torture_read(const char *rcutorturename, unsigned long c_old, unsigned long c); #else +static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, + int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + *flags = 0; + *gpnum = 0; + *completed = 0; +} static inline void rcutorture_record_test_transition(void) { } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bd30bc61bc0..0d739e3797e 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -138,6 +138,15 @@ static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; +static int rcu_torture_writer_state; +#define RTWS_FIXED_DELAY 0 +#define RTWS_DELAY 1 +#define RTWS_REPLACE 2 +#define RTWS_DEF_FREE 3 +#define RTWS_EXP_SYNC 4 +#define RTWS_STUTTER 5 +#define RTWS_STOPPING 6 + #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) #define RCUTORTURE_RUNNABLE_INIT 1 #else @@ -214,6 +223,7 @@ rcu_torture_free(struct rcu_torture *p) */ struct rcu_torture_ops { + int ttype; void (*init)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); @@ -312,6 +322,7 @@ static void rcu_sync_torture_init(void) } static struct rcu_torture_ops rcu_ops = { + .ttype = RCU_FLAVOR, .init = rcu_sync_torture_init, .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, @@ -355,6 +366,7 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p) } static struct rcu_torture_ops rcu_bh_ops = { + .ttype = RCU_BH_FLAVOR, .init = rcu_sync_torture_init, .readlock = rcu_bh_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ @@ -397,6 +409,7 @@ call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } static struct rcu_torture_ops rcu_busted_ops = { + .ttype = INVALID_RCU_FLAVOR, .init = rcu_sync_torture_init, .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ @@ -492,6 +505,7 @@ static void srcu_torture_synchronize_expedited(void) } static struct rcu_torture_ops srcu_ops = { + .ttype = SRCU_FLAVOR, .init = rcu_sync_torture_init, .readlock = srcu_torture_read_lock, .read_delay = srcu_read_delay, @@ -527,6 +541,7 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p) } static struct rcu_torture_ops sched_ops = { + .ttype = RCU_SCHED_FLAVOR, .init = rcu_sync_torture_init, .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ @@ -699,12 +714,15 @@ rcu_torture_writer(void *arg) set_user_nice(current, MAX_NICE); do { + rcu_torture_writer_state = RTWS_FIXED_DELAY; schedule_timeout_uninterruptible(1); rp = rcu_torture_alloc(); if (rp == NULL) continue; rp->rtort_pipe_count = 0; + rcu_torture_writer_state = RTWS_DELAY; udelay(torture_random(&rand) & 0x3ff); + rcu_torture_writer_state = RTWS_REPLACE; old_rp = rcu_dereference_check(rcu_torture_current, current == writer_task); rp->rtort_mbtest = 1; @@ -721,8 +739,10 @@ rcu_torture_writer(void *arg) else exp = gp_exp; if (!exp) { + rcu_torture_writer_state = RTWS_DEF_FREE; cur_ops->deferred_free(old_rp); } else { + rcu_torture_writer_state = RTWS_EXP_SYNC; cur_ops->exp_sync(); list_add(&old_rp->rtort_free, &rcu_torture_removed); @@ -743,8 +763,10 @@ rcu_torture_writer(void *arg) } } rcutorture_record_progress(++rcu_torture_current_version); + rcu_torture_writer_state = RTWS_STUTTER; stutter_wait("rcu_torture_writer"); } while (!torture_must_stop()); + rcu_torture_writer_state = RTWS_STOPPING; torture_kthread_stopping("rcu_torture_writer"); return 0; } @@ -937,6 +959,7 @@ rcu_torture_printk(char *page) int i; long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; + static unsigned long rtcv_snap = ULONG_MAX; for_each_possible_cpu(cpu) { for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { @@ -997,6 +1020,20 @@ rcu_torture_printk(char *page) page += sprintf(page, "\n"); if (cur_ops->stats) cur_ops->stats(page); + if (rtcv_snap == rcu_torture_current_version && + rcu_torture_current != NULL) { + int __maybe_unused flags; + unsigned long __maybe_unused gpnum; + unsigned long __maybe_unused completed; + + rcutorture_get_gp_data(cur_ops->ttype, + &flags, &gpnum, &completed); + page += sprintf(page, + "??? Writer stall state %d g%lu c%lu f%#x\n", + rcu_torture_writer_state, + gpnum, completed, flags); + } + rtcv_snap = rcu_torture_current_version; } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0c47e300210..3d15b5a82ae 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -293,6 +293,39 @@ void rcutorture_record_test_transition(void) } EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); +/* + * Send along grace-period-related data for rcutorture diagnostics. + */ +void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, + unsigned long *gpnum, unsigned long *completed) +{ + struct rcu_state *rsp = NULL; + + switch (test_type) { + case RCU_FLAVOR: + rsp = rcu_state; + break; + case RCU_BH_FLAVOR: + rsp = &rcu_bh_state; + break; + case RCU_SCHED_FLAVOR: + rsp = &rcu_sched_state; + break; + default: + break; + } + if (rsp != NULL) { + *flags = ACCESS_ONCE(rsp->gp_flags); + *gpnum = ACCESS_ONCE(rsp->gpnum); + *completed = ACCESS_ONCE(rsp->completed); + return; + } + *flags = 0; + *gpnum = 0; + *completed = 0; +} +EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); + /* * Record the number of writer passes through the current rcutorture test. * This is also used to correlate debugfs tracing stats with the rcutorture -- cgit v1.2.3-70-g09d2 From afea227fd4acf4f097a9e77bbc2f07d4856ebd01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Mar 2014 07:10:41 -0700 Subject: rcutorture: Export RCU grace-period kthread wait state to rcutorture This commit allows rcutorture to print additional state for the RCU grace-period kthreads in cases where RCU seems reluctant to start a new grace period. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcutiny.h | 4 ++++ include/linux/rcutree.h | 1 + kernel/rcu/rcutorture.c | 1 + kernel/rcu/tree.c | 17 +++++++++++++++++ kernel/rcu/tree.h | 8 +++++++- 5 files changed, 30 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 425c659d54e..d40a6a45133 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -119,6 +119,10 @@ static inline void rcu_sched_force_quiescent_state(void) { } +static inline void show_rcu_gp_kthreads(void) +{ +} + static inline void rcu_cpu_stall_reset(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index a59ca05fd4e..3e2f5d43274 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -84,6 +84,7 @@ extern unsigned long rcutorture_vernum; long rcu_batches_completed(void); long rcu_batches_completed_bh(void); long rcu_batches_completed_sched(void); +void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 9decce0f110..37ae5e1d4a1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1034,6 +1034,7 @@ rcu_torture_printk(char *page) "??? Writer stall state %d g%lu c%lu f%#x\n", rcu_torture_writer_state, gpnum, completed, flags); + show_rcu_gp_kthreads(); rcutorture_trace_dump(); } rtcv_snap = rcu_torture_current_version; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3d15b5a82ae..93e64381aa2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -279,6 +279,21 @@ void rcu_bh_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); +/* + * Show the state of the grace-period kthreads. + */ +void show_rcu_gp_kthreads(void) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + pr_info("%s: wait state: %d ->state: %#lx\n", + rsp->name, rsp->gp_state, rsp->gp_kthread->state); + /* sched_show_task(rsp->gp_kthread); */ + } +} +EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); + /* * Record the number of times rcutorture tests have been initiated and * terminated. This information allows the debugfs tracing stats to be @@ -1626,6 +1641,7 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("reqwait")); + rsp->gp_state = RCU_GP_WAIT_GPS; wait_event_interruptible(rsp->gp_wq, ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); @@ -1653,6 +1669,7 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("fqswait")); + rsp->gp_state = RCU_GP_WAIT_FQS; ret = wait_event_interruptible_timeout(rsp->gp_wq, ((gf = ACCESS_ONCE(rsp->gp_flags)) & RCU_GP_FLAG_FQS) || diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 75dc3c39a02..c2fd1e72287 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -406,7 +406,8 @@ struct rcu_state { unsigned long completed; /* # of last completed gp. */ struct task_struct *gp_kthread; /* Task for grace periods. */ wait_queue_head_t gp_wq; /* Where GP task waits. */ - int gp_flags; /* Commands for GP task. */ + short gp_flags; /* Commands for GP task. */ + short gp_state; /* GP kthread sleep state. */ /* End of fields guarded by root rcu_node's lock. */ @@ -469,6 +470,11 @@ struct rcu_state { #define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */ #define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ +/* Values for rcu_state structure's gp_flags field. */ +#define RCU_GP_WAIT_INIT 0 /* Initial state. */ +#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */ +#define RCU_GP_WAIT_FQS 2 /* Wait for force-quiescent-state time. */ + extern struct list_head rcu_struct_flavors; /* Sequence through rcu_state structures for each RCU flavor. */ -- cgit v1.2.3-70-g09d2 From e534165bbf6a04d001748c573c7d6a7bae3713a5 Mon Sep 17 00:00:00 2001 From: Uma Sharma Date: Sun, 23 Mar 2014 22:32:09 -0700 Subject: rcu: Variable name changed in tree_plugin.h and used in tree.c The variable and struct both having the name "rcu_state" confuses sparse in some situations, so this commit changes the variable to "rcu_state_p" in order to avoid this confusion. This also makes things easier for human readers. Signed-off-by: Uma Sharma [ paulmck: Changed the declaration and several additional uses. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 16 ++++++++-------- kernel/rcu/tree_plugin.h | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3bbe48939e4..3e3f13e8b42 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -101,7 +101,7 @@ DEFINE_PER_CPU(struct rcu_data, sname##_data) RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -static struct rcu_state *rcu_state; +static struct rcu_state *rcu_state_p; LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ @@ -275,7 +275,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); */ void rcu_force_quiescent_state(void) { - force_quiescent_state(rcu_state); + force_quiescent_state(rcu_state_p); } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); @@ -327,7 +327,7 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, switch (test_type) { case RCU_FLAVOR: - rsp = rcu_state; + rsp = rcu_state_p; break; case RCU_BH_FLAVOR: rsp = &rcu_bh_state; @@ -910,7 +910,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * we will beat on the first one until it gets unstuck, then move * to the next. Only do this for the primary flavor of RCU. */ - if (rdp->rsp == rcu_state && + if (rdp->rsp == rcu_state_p && ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { rdp->rsp->jiffies_resched += 5; resched_cpu(rdp->cpu); @@ -2660,7 +2660,7 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, rcu_state, -1, 1); + __call_rcu(head, func, rcu_state_p, -1, 1); } EXPORT_SYMBOL_GPL(kfree_call_rcu); @@ -2787,7 +2787,7 @@ unsigned long get_state_synchronize_rcu(void) * time-consuming work between get_state_synchronize_rcu() * and cond_synchronize_rcu(). */ - return smp_load_acquire(&rcu_state->gpnum); + return smp_load_acquire(&rcu_state_p->gpnum); } EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); @@ -2813,7 +2813,7 @@ void cond_synchronize_rcu(unsigned long oldstate) * Ensure that this load happens before any RCU-destructive * actions the caller might carry out after we return. */ - newstate = smp_load_acquire(&rcu_state->completed); + newstate = smp_load_acquire(&rcu_state_p->completed); if (ULONG_CMP_GE(oldstate, newstate)) synchronize_rcu(); } @@ -3354,7 +3354,7 @@ static int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); struct rcu_node *rnp = rdp->mynode; struct rcu_state *rsp; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2e579c38bd9..29977ae84e7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -116,7 +116,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); -static struct rcu_state *rcu_state = &rcu_preempt_state; +static struct rcu_state *rcu_state_p = &rcu_preempt_state; static int rcu_preempted_readers_exp(struct rcu_node *rnp); @@ -947,7 +947,7 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static struct rcu_state *rcu_state = &rcu_sched_state; +static struct rcu_state *rcu_state_p = &rcu_sched_state; /* * Tell them what RCU they are running. @@ -1468,11 +1468,11 @@ static int __init rcu_spawn_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); - rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); + rnp = rcu_get_root(rcu_state_p); + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); + rcu_for_each_leaf_node(rcu_state_p, rnp) + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } return 0; } @@ -1480,12 +1480,12 @@ early_initcall(rcu_spawn_kthreads); static void rcu_prepare_kthreads(int cpu) { - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ if (rcu_scheduler_fully_active) - (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); + (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } #else /* #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3-70-g09d2 From 4a81e8328d3791a4f99bf5b436d050f6dc5ffea3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 20 Jun 2014 16:49:01 -0700 Subject: rcu: Reduce overhead of cond_resched() checks for RCU Commit ac1bea85781e (Make cond_resched() report RCU quiescent states) fixed a problem where a CPU looping in the kernel with but one runnable task would give RCU CPU stall warnings, even if the in-kernel loop contained cond_resched() calls. Unfortunately, in so doing, it introduced performance regressions in Anton Blanchard's will-it-scale "open1" test. The problem appears to be not so much the increased cond_resched() path length as an increase in the rate at which grace periods complete, which increased per-update grace-period overhead. This commit takes a different approach to fixing this bug, mainly by moving the RCU-visible quiescent state from cond_resched() to rcu_note_context_switch(), and by further reducing the check to a simple non-zero test of a single per-CPU variable. However, this approach requires that the force-quiescent-state processing send resched IPIs to the offending CPUs. These will be sent only once the grace period has reached an age specified by the boot/sysfs parameter rcutree.jiffies_till_sched_qs, or once the grace period reaches an age halfway to the point at which RCU CPU stall warnings will be emitted, whichever comes first. Reported-by: Dave Hansen Signed-off-by: Paul E. McKenney Cc: Andi Kleen Cc: Christoph Lameter Cc: Mike Galbraith Cc: Eric Dumazet Reviewed-by: Josh Triplett [ paulmck: Made rcu_momentary_dyntick_idle() as suggested by the ktest build robot. Also fixed smp_mb() comment as noted by Oleg Nesterov. ] Merge with e552592e (Reduce overhead of cond_resched() checks for RCU) Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 6 ++ include/linux/rcupdate.h | 36 ---------- kernel/rcu/tree.c | 140 ++++++++++++++++++++++++++++-------- kernel/rcu/tree.h | 6 +- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 18 ----- kernel/sched/core.c | 7 +- 7 files changed, 125 insertions(+), 90 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6eaa9cdb709..910c3829f81 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2785,6 +2785,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. + rcutree.jiffies_till_sched_qs= [KNL] + Set required age in jiffies for a + given grace period before RCU starts + soliciting quiescent-state help from + rcu_note_context_switch(). + rcutree.jiffies_till_first_fqs= [KNL] Set delay from grace-period initialization to first attempt to force quiescent states. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13bbfbde41b..6a94cc8b1ca 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,7 +44,6 @@ #include #include #include -#include #include extern int rcu_expedited; /* for sysctl */ @@ -299,41 +298,6 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev, bool __rcu_is_watching(void); #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ -/* - * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings. - */ - -#define RCU_COND_RESCHED_LIM 256 /* ms vs. 100s of ms. */ -DECLARE_PER_CPU(int, rcu_cond_resched_count); -void rcu_resched(void); - -/* - * Is it time to report RCU quiescent states? - * - * Note unsynchronized access to rcu_cond_resched_count. Yes, we might - * increment some random CPU's count, and possibly also load the result from - * yet another CPU's count. We might even clobber some other CPU's attempt - * to zero its counter. This is all OK because the goal is not precision, - * but rather reasonable amortization of rcu_note_context_switch() overhead - * and extremely high probability of avoiding RCU CPU stall warnings. - * Note that this function has to be preempted in just the wrong place, - * many thousands of times in a row, for anything bad to happen. - */ -static inline bool rcu_should_resched(void) -{ - return raw_cpu_inc_return(rcu_cond_resched_count) >= - RCU_COND_RESCHED_LIM; -} - -/* - * Report quiscent states to RCU if it is time to do so. - */ -static inline void rcu_cond_resched(void) -{ - if (unlikely(rcu_should_resched())) - rcu_resched(); -} - /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f1ba77363fb..625d0b0cd75 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu) rdp->passed_quiesce = 1; } +static DEFINE_PER_CPU(int, rcu_sched_qs_mask); + +static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { + .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks = ATOMIC_INIT(1), +#ifdef CONFIG_NO_HZ_FULL_SYSIDLE + .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, + .dynticks_idle = ATOMIC_INIT(1), +#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ +}; + +/* + * Let the RCU core know that this CPU has gone through the scheduler, + * which is a quiescent state. This is called when the need for a + * quiescent state is urgent, so we burn an atomic operation and full + * memory barriers to let the RCU core know about it, regardless of what + * this CPU might (or might not) do in the near future. + * + * We inform the RCU core by emulating a zero-duration dyntick-idle + * period, which we in turn do by incrementing the ->dynticks counter + * by two. + */ +static void rcu_momentary_dyntick_idle(void) +{ + unsigned long flags; + struct rcu_data *rdp; + struct rcu_dynticks *rdtp; + int resched_mask; + struct rcu_state *rsp; + + local_irq_save(flags); + + /* + * Yes, we can lose flag-setting operations. This is OK, because + * the flag will be set again after some delay. + */ + resched_mask = raw_cpu_read(rcu_sched_qs_mask); + raw_cpu_write(rcu_sched_qs_mask, 0); + + /* Find the flavor that needs a quiescent state. */ + for_each_rcu_flavor(rsp) { + rdp = raw_cpu_ptr(rsp->rda); + if (!(resched_mask & rsp->flavor_mask)) + continue; + smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ + if (ACCESS_ONCE(rdp->mynode->completed) != + ACCESS_ONCE(rdp->cond_resched_completed)) + continue; + + /* + * Pretend to be momentarily idle for the quiescent state. + * This allows the grace-period kthread to record the + * quiescent state, with no need for this CPU to do anything + * further. + */ + rdtp = this_cpu_ptr(&rcu_dynticks); + smp_mb__before_atomic(); /* Earlier stuff before QS. */ + atomic_add(2, &rdtp->dynticks); /* QS. */ + smp_mb__after_atomic(); /* Later stuff after QS. */ + break; + } + local_irq_restore(flags); +} + /* * Note a context switch. This is a quiescent state for RCU-sched, * and requires special handling for preemptible RCU. @@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu) trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(cpu); rcu_preempt_note_context_switch(cpu); + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + rcu_momentary_dyntick_idle(); trace_rcu_utilization(TPS("End context switch")); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); -static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, - .dynticks = ATOMIC_INIT(1), -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE - .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, - .dynticks_idle = ATOMIC_INIT(1), -#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -}; - static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static long qhimark = 10000; /* If this many pending, ignore blimit. */ static long qlowmark = 100; /* Once only this many pending, use blimit. */ @@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); +/* + * How long the grace period must be before we start recruiting + * quiescent-state help from rcu_note_context_switch(). + */ +static ulong jiffies_till_sched_qs = HZ / 20; +module_param(jiffies_till_sched_qs, ulong, 0644); + static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); static void force_qs_rnp(struct rcu_state *rsp, @@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) { unsigned int curr; + int *rcrmp; unsigned int snap; curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); @@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, } /* - * There is a possibility that a CPU in adaptive-ticks state - * might run in the kernel with the scheduling-clock tick disabled - * for an extended time period. Invoke rcu_kick_nohz_cpu() to - * force the CPU to restart the scheduling-clock tick in this - * CPU is in this state. - */ - rcu_kick_nohz_cpu(rdp->cpu); - - /* - * Alternatively, the CPU might be running in the kernel - * for an extended period of time without a quiescent state. - * Attempt to force the CPU through the scheduler to gain the - * needed quiescent state, but only if the grace period has gone - * on for an uncommonly long time. If there are many stuck CPUs, - * we will beat on the first one until it gets unstuck, then move - * to the next. Only do this for the primary flavor of RCU. + * A CPU running for an extended time within the kernel can + * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, + * even context-switching back and forth between a pair of + * in-kernel CPU-bound tasks cannot advance grace periods. + * So if the grace period is old enough, make the CPU pay attention. + * Note that the unsynchronized assignments to the per-CPU + * rcu_sched_qs_mask variable are safe. Yes, setting of + * bits can be lost, but they will be set again on the next + * force-quiescent-state pass. So lost bit sets do not result + * in incorrect behavior, merely in a grace period lasting + * a few jiffies longer than it might otherwise. Because + * there are at most four threads involved, and because the + * updates are only once every few jiffies, the probability of + * lossage (and thus of slight grace-period extension) is + * quite low. + * + * Note that if the jiffies_till_sched_qs boot/sysfs parameter + * is set too high, we override with half of the RCU CPU stall + * warning delay. */ - if (rdp->rsp == rcu_state_p && + rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); + if (ULONG_CMP_GE(jiffies, + rdp->rsp->gp_start + jiffies_till_sched_qs) || ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { - rdp->rsp->jiffies_resched += 5; - resched_cpu(rdp->cpu); + if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { + ACCESS_ONCE(rdp->cond_resched_completed) = + ACCESS_ONCE(rdp->mynode->completed); + smp_mb(); /* ->cond_resched_completed before *rcrmp. */ + ACCESS_ONCE(*rcrmp) = + ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask; + resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ + rdp->rsp->jiffies_resched += 5; /* Enable beating. */ + } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { + /* Time to beat on that CPU again! */ + resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ + rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ + } } return 0; @@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ + static u8 fl_mask = 0x1; int cpustride = 1; int i; int j; @@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp, for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rcu_init_levelspread(rsp); + rsp->flavor_mask = fl_mask; + fl_mask <<= 1; /* Initialize the elements themselves, starting from the leaves. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bf2c1e66969..0f69a79c5b7 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -307,6 +307,9 @@ struct rcu_data { /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long cond_resched_completed; + /* Grace period that needs help */ + /* from cond_resched(). */ /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ @@ -392,6 +395,7 @@ struct rcu_state { struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ + u8 flavor_mask; /* bit in flavor mask. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*func)(struct rcu_head *head)); @@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); static void do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); -static void rcu_kick_nohz_cpu(int cpu); +static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index cbc2c45265e..02ac0fb186b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) * if an adaptive-ticks CPU is failing to respond to the current grace * period and has not be idle from an RCU perspective, kick it. */ -static void rcu_kick_nohz_cpu(int cpu) +static void __maybe_unused rcu_kick_nohz_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_cpu(cpu)) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 0fb691e63ce..bc788357053 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void) early_initcall(check_cpu_stall_init); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ - -/* - * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings. - */ - -DEFINE_PER_CPU(int, rcu_cond_resched_count); - -/* - * Report a set of RCU quiescent states, for use by cond_resched() - * and friends. Out of line due to being called infrequently. - */ -void rcu_resched(void) -{ - preempt_disable(); - __this_cpu_write(rcu_cond_resched_count, 0); - rcu_note_context_switch(smp_processor_id()); - preempt_enable(); -} diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494f..bc1638b3344 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4147,7 +4147,6 @@ static void __cond_resched(void) int __sched _cond_resched(void) { - rcu_cond_resched(); if (should_resched()) { __cond_resched(); return 1; @@ -4166,18 +4165,15 @@ EXPORT_SYMBOL(_cond_resched); */ int __cond_resched_lock(spinlock_t *lock) { - bool need_rcu_resched = rcu_should_resched(); int resched = should_resched(); int ret = 0; lockdep_assert_held(lock); - if (spin_needbreak(lock) || resched || need_rcu_resched) { + if (spin_needbreak(lock) || resched) { spin_unlock(lock); if (resched) __cond_resched(); - else if (unlikely(need_rcu_resched)) - rcu_resched(); else cpu_relax(); ret = 1; @@ -4191,7 +4187,6 @@ int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); - rcu_cond_resched(); /* BH disabled OK, just recording QSes. */ if (should_resched()) { local_bh_enable(); __cond_resched(); -- cgit v1.2.3-70-g09d2