diff options
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 184 |
1 files changed, 157 insertions, 27 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index febd0f72a3c..48d640ca1a0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); +/* rcuc/rcub kthread realtime priority */ +static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +module_param(kthread_prio, int, 0644); + /* * Track the rcutorture test sequence number and the update version * number within a given test. The rcutorture_testseq is incremented @@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; +DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); +EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); + /* * Let the RCU core know that this CPU has gone through the scheduler, * which is a quiescent state. This is called when the need for a @@ -284,6 +291,22 @@ void rcu_note_context_switch(void) } EXPORT_SYMBOL_GPL(rcu_note_context_switch); +/* + * Register a quiesecent state for all RCU flavors. If there is an + * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight + * dyntick-idle quiescent state visible to other CPUs (but only for those + * RCU flavors in desparate need of a quiescent state, which will normally + * be none of them). Either way, do a lightweight quiescent state for + * all RCU flavors. + */ +void rcu_all_qs(void) +{ + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + rcu_momentary_dyntick_idle(); + this_cpu_inc(rcu_qs_ctr); +} +EXPORT_SYMBOL_GPL(rcu_all_qs); + static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static long qhimark = 10000; /* If this many pending, ignore blimit. */ static long qlowmark = 100; /* Once only this many pending, use blimit. */ @@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(void); /* - * Return the number of RCU-sched batches processed thus far for debug & stats. + * Return the number of RCU batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started(void) +{ + return rcu_state_p->gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started); + +/* + * Return the number of RCU-sched batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started_sched(void) +{ + return rcu_sched_state.gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started_sched); + +/* + * Return the number of RCU BH batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started_bh(void) +{ + return rcu_bh_state.gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started_bh); + +/* + * Return the number of RCU batches completed thus far for debug & stats. + */ +unsigned long rcu_batches_completed(void) +{ + return rcu_state_p->completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Return the number of RCU-sched batches completed thus far for debug & stats. */ -long rcu_batches_completed_sched(void) +unsigned long rcu_batches_completed_sched(void) { return rcu_sched_state.completed; } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); /* - * Return the number of RCU BH batches processed thus far for debug & stats. + * Return the number of RCU BH batches completed thus far for debug & stats. */ -long rcu_batches_completed_bh(void) +unsigned long rcu_batches_completed_bh(void) { return rcu_bh_state.completed; } @@ -930,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); return 1; } else { + if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4, + rdp->mynode->gpnum)) + ACCESS_ONCE(rdp->gpwrap) = true; return 0; } } /* - * This function really isn't for public consumption, but RCU is special in - * that context switches can allow the state machine to make progress. - */ -extern void resched_cpu(int cpu); - -/* * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() @@ -1043,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) j1 = rcu_jiffies_till_stall_check(); ACCESS_ONCE(rsp->jiffies_stall) = j + j1; rsp->jiffies_resched = j + j1 / 2; + rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs); +} + +/* + * Complain about starvation of grace-period kthread. + */ +static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) +{ + unsigned long gpa; + unsigned long j; + + j = jiffies; + gpa = ACCESS_ONCE(rsp->gp_activity); + if (j - gpa > 2 * HZ) + pr_err("%s kthread starved for %ld jiffies!\n", + rsp->name, j - gpa); } /* @@ -1065,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) } } -static void print_other_cpu_stall(struct rcu_state *rsp) +static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) { int cpu; long delta; unsigned long flags; + unsigned long gpa; + unsigned long j; int ndetected = 0; struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; @@ -1113,15 +1187,28 @@ static void print_other_cpu_stall(struct rcu_state *rsp) pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), (long)rsp->gpnum, (long)rsp->completed, totqlen); - if (ndetected == 0) - pr_err("INFO: Stall ended before state dump start\n"); - else + if (ndetected) { rcu_dump_cpu_stacks(rsp); + } else { + if (ACCESS_ONCE(rsp->gpnum) != gpnum || + ACCESS_ONCE(rsp->completed) == gpnum) { + pr_err("INFO: Stall ended before state dump start\n"); + } else { + j = jiffies; + gpa = ACCESS_ONCE(rsp->gp_activity); + pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld\n", + rsp->name, j - gpa, j, gpa, + jiffies_till_next_fqs); + /* In this case, the current CPU might be at fault. */ + sched_show_task(current); + } + } /* Complain about tasks blocking the grace period. */ - rcu_print_detail_task_stall(rsp); + rcu_check_gp_kthread_starvation(rsp); + force_quiescent_state(rsp); /* Kick them all. */ } @@ -1146,6 +1233,9 @@ static void print_cpu_stall(struct rcu_state *rsp) pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", jiffies - rsp->gp_start, (long)rsp->gpnum, (long)rsp->completed, totqlen); + + rcu_check_gp_kthread_starvation(rsp); + rcu_dump_cpu_stacks(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); @@ -1216,7 +1306,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { /* They had a few time units to dump stack, so complain. */ - print_other_cpu_stall(rsp); + print_other_cpu_stall(rsp, gpnum); } } @@ -1553,7 +1643,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, bool ret; /* Handle the ends of any preceding grace periods first. */ - if (rdp->completed == rnp->completed) { + if (rdp->completed == rnp->completed && + !unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* No grace period end, so just accelerate recent callbacks. */ ret = rcu_accelerate_cbs(rsp, rnp, rdp); @@ -1568,7 +1659,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); } - if (rdp->gpnum != rnp->gpnum) { + if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* * If the current grace period is waiting for this CPU, * set up to detect a quiescent state, otherwise don't @@ -1577,8 +1668,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); rdp->passed_quiesce = 0; + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); + ACCESS_ONCE(rdp->gpwrap) = false; } return ret; } @@ -1592,7 +1685,8 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_save(flags); rnp = rdp->mynode; if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && - rdp->completed == ACCESS_ONCE(rnp->completed)) || /* w/out lock. */ + rdp->completed == ACCESS_ONCE(rnp->completed) && + !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */ !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ local_irq_restore(flags); return; @@ -1612,6 +1706,7 @@ static int rcu_gp_init(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); + ACCESS_ONCE(rsp->gp_activity) = jiffies; rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); @@ -1672,6 +1767,7 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; } mutex_unlock(&rsp->onoff_mutex); @@ -1688,6 +1784,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) unsigned long maxj; struct rcu_node *rnp = rcu_get_root(rsp); + ACCESS_ONCE(rsp->gp_activity) = jiffies; rsp->n_force_qs++; if (fqs_state == RCU_SAVE_DYNTICK) { /* Collect dyntick-idle snapshots. */ @@ -1726,6 +1823,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); + ACCESS_ONCE(rsp->gp_activity) = jiffies; raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); gp_duration = jiffies - rsp->gp_start; @@ -1762,6 +1860,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); @@ -1811,6 +1910,7 @@ static int __noreturn rcu_gp_kthread(void *arg) if (rcu_gp_init(rsp)) break; cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -1854,9 +1954,11 @@ static int __noreturn rcu_gp_kthread(void *arg) ACCESS_ONCE(rsp->gpnum), TPS("fqsend")); cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; } else { /* Deal with stray signal. */ cond_resched_rcu_qs(); + ACCESS_ONCE(rsp->gp_activity) = jiffies; WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), @@ -2033,8 +2135,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || - rnp->completed == rnp->gpnum) { + if ((rdp->passed_quiesce == 0 && + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || + rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || + rdp->gpwrap) { /* * The grace period in which this quiescent state was @@ -2043,6 +2147,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * within the current grace period. */ rdp->passed_quiesce = 0; /* need qs for new gp. */ + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -2087,7 +2192,8 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Was there a quiescent state since the beginning of the grace * period? If no, then exit and wait for the next call. */ - if (!rdp->passed_quiesce) + if (!rdp->passed_quiesce && + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) return; /* @@ -2603,7 +2709,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) * Schedule RCU callback invocation. If the specified type of RCU * does not support RCU priority boosting, just do a direct call, * otherwise wake up the per-CPU kernel kthread. Note that because we - * are running on the current CPU with interrupts disabled, the + * are running on the current CPU with softirqs disabled, the * rcu_cpu_kthread_task cannot disappear out from under us. */ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) @@ -3143,9 +3249,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && - rdp->qs_pending && !rdp->passed_quiesce) { + rdp->qs_pending && !rdp->passed_quiesce && + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { rdp->n_rp_qs_pending++; - } else if (rdp->qs_pending && rdp->passed_quiesce) { + } else if (rdp->qs_pending && + (rdp->passed_quiesce || + rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { rdp->n_rp_report_qs++; return 1; } @@ -3169,7 +3278,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ + if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum || + unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */ rdp->n_rp_gp_started++; return 1; } @@ -3352,6 +3462,7 @@ static void _rcu_barrier(struct rcu_state *rsp) } else { _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, rsp->n_barrier_done); + smp_mb__before_atomic(); atomic_inc(&rsp->barrier_cpu_count); __call_rcu(&rdp->barrier_head, rcu_barrier_callback, rsp, cpu, 0); @@ -3475,6 +3586,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; rdp->completed = rnp->completed; rdp->passed_quiesce = 0; + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->qs_pending = 0; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); } @@ -3566,17 +3678,35 @@ static int rcu_pm_notify(struct notifier_block *self, static int __init rcu_spawn_gp_kthread(void) { unsigned long flags; + int kthread_prio_in = kthread_prio; struct rcu_node *rnp; struct rcu_state *rsp; + struct sched_param sp; struct task_struct *t; + /* Force priority into range. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) + kthread_prio = 1; + else if (kthread_prio < 0) + kthread_prio = 0; + else if (kthread_prio > 99) + kthread_prio = 99; + if (kthread_prio != kthread_prio_in) + pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", + kthread_prio, kthread_prio_in); + rcu_scheduler_fully_active = 1; for_each_rcu_flavor(rsp) { - t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); + t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); BUG_ON(IS_ERR(t)); rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); rsp->gp_kthread = t; + if (kthread_prio) { + sp.sched_priority = kthread_prio; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + } + wake_up_process(t); raw_spin_unlock_irqrestore(&rnp->lock, flags); } rcu_spawn_nocb_kthreads(); |