From 24278d148316d2180be6df40e06db013d8b232b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2010 17:25:23 -0700 Subject: rcu: priority boosting for TINY_PREEMPT_RCU Add priority boosting, but only for TINY_PREEMPT_RCU. This is enabled by the default-off RCU_BOOST kernel parameter. The priority to which to boost preempted RCU readers is controlled by the RCU_BOOST_PRIO kernel parameter (defaulting to real-time priority 1) and the time to wait before boosting the readers blocking a given grace period is controlled by the RCU_BOOST_DELAY kernel parameter (defaulting to 500 milliseconds). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutiny.c | 66 ++++++--------- kernel/rcutiny_plugin.h | 208 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 224 insertions(+), 50 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 86eef29cdfb..93d166582cb 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -36,38 +36,16 @@ #include #include -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */ -static struct task_struct *rcu_cbs_task; -static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq); -static unsigned long have_rcu_cbs; -static void invoke_rcu_cbs(void); +/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ +static struct task_struct *rcu_kthread_task; +static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); +static unsigned long have_rcu_kthread_work; +static void invoke_rcu_kthread(void); /* Forward declarations for rcutiny_plugin.h. */ +struct rcu_ctrlblk; static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); -static int rcu_cbs(void *arg); +static int rcu_kthread(void *arg); static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); @@ -130,7 +108,7 @@ void rcu_sched_qs(int cpu) { if (rcu_qsctr_help(&rcu_sched_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -139,7 +117,7 @@ void rcu_sched_qs(int cpu) void rcu_bh_qs(int cpu) { if (rcu_qsctr_help(&rcu_bh_ctrlblk)) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -201,37 +179,41 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) * This is a kthread, but it is never stopped, at least not until * the system goes down. */ -static int rcu_cbs(void *arg) +static int rcu_kthread(void *arg) { unsigned long work; + unsigned long morework; unsigned long flags; for (;;) { - wait_event(rcu_cbs_wq, have_rcu_cbs != 0); + wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); + morework = rcu_boost(); local_irq_save(flags); - work = have_rcu_cbs; - have_rcu_cbs = 0; + work = have_rcu_kthread_work; + have_rcu_kthread_work = morework; local_irq_restore(flags); if (work) { rcu_process_callbacks(&rcu_sched_ctrlblk); rcu_process_callbacks(&rcu_bh_ctrlblk); rcu_preempt_process_callbacks(); } + schedule_timeout_interruptible(1); /* Leave CPU for others. */ } return 0; /* Not reached, but needed to shut gcc up. */ } /* - * Wake up rcu_cbs() to process callbacks now eligible for invocation. + * Wake up rcu_kthread() to process callbacks now eligible for invocation + * or to boost readers. */ -static void invoke_rcu_cbs(void) +static void invoke_rcu_kthread(void) { unsigned long flags; local_irq_save(flags); - have_rcu_cbs = 1; - wake_up(&rcu_cbs_wq); + have_rcu_kthread_work = 1; + wake_up(&rcu_kthread_wq); local_irq_restore(flags); } @@ -327,7 +309,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched); */ static int __init rcu_spawn_kthreads(void) { - rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs"); + struct sched_param sp; + + rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); + sp.sched_priority = RCU_BOOST_PRIO; + sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); return 0; } early_initcall(rcu_spawn_kthreads); diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 95f9239df51..24f43165f22 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -24,6 +24,29 @@ #include +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_TINY_PREEMPT_RCU #include @@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk { struct list_head *gp_tasks; /* Pointer to the first task blocking the */ /* current grace period, or NULL if there */ - /* is not such task. */ + /* is no such task. */ struct list_head *exp_tasks; /* Pointer to first task blocking the */ /* current expedited grace period, or NULL */ /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there cannot be any such task. */ +#ifdef CONFIG_RCU_BOOST + struct list_head *boost_tasks; + /* Pointer to first task that needs to be */ + /* priority-boosted, or NULL if no priority */ + /* boosting is needed. If there is no */ + /* current or expedited grace period, there */ + /* can be no such task. */ +#endif /* #ifdef CONFIG_RCU_BOOST */ u8 gpnum; /* Current grace period. */ u8 gpcpu; /* Last grace period blocked by the CPU. */ u8 completed; /* Last grace period completed. */ /* If all three are equal, RCU is idle. */ + s8 boosted_this_gp; /* Has boosting already happened? */ + unsigned long boost_time; /* When to start boosting (jiffies) */ }; static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { @@ -123,6 +156,130 @@ static int rcu_preempt_gp_in_progress(void) return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; } +/* + * Advance a ->blkd_tasks-list pointer to the next entry, instead + * returning NULL if at the end of the list. + */ +static struct list_head *rcu_next_node_entry(struct task_struct *t) +{ + struct list_head *np; + + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + return np; +} + +#ifdef CONFIG_RCU_BOOST + +#include "rtmutex_common.h" + +/* + * Carry out RCU priority boosting on the task indicated by ->boost_tasks, + * and advance ->boost_tasks to the next task in the ->blkd_tasks list. + */ +static int rcu_boost(void) +{ + unsigned long flags; + struct rt_mutex mtx; + struct list_head *np; + struct task_struct *t; + + if (rcu_preempt_ctrlblk.boost_tasks == NULL) + return 0; /* Nothing to boost. */ + raw_local_irq_save(flags); + rcu_preempt_ctrlblk.boosted_this_gp++; + t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, + rcu_node_entry); + np = rcu_next_node_entry(t); + rt_mutex_init_proxy_locked(&mtx, t); + t->rcu_boost_mutex = &mtx; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; + raw_local_irq_restore(flags); + rt_mutex_lock(&mtx); + rt_mutex_unlock(&mtx); + return rcu_preempt_ctrlblk.boost_tasks != NULL; +} + +/* + * Check to see if it is now time to start boosting RCU readers blocking + * the current grace period, and, if so, tell the rcu_kthread_task to + * start boosting them. If there is an expedited boost in progress, + * we wait for it to complete. + */ +static void rcu_initiate_boost(void) +{ + if (rcu_preempt_ctrlblk.gp_tasks != NULL && + rcu_preempt_ctrlblk.boost_tasks == NULL && + rcu_preempt_ctrlblk.boosted_this_gp == 0 && + ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { + rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; + invoke_rcu_kthread(); + } +} + +/* + * Initiate boosting for an expedited grace period. + */ +static void rcu_initiate_expedited_boost(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { + rcu_preempt_ctrlblk.boost_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + rcu_preempt_ctrlblk.boosted_this_gp = -1; + invoke_rcu_kthread(); + } + raw_local_irq_restore(flags); +} + +#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); + +/* + * Do priority-boost accounting for the start of a new grace period. + */ +static void rcu_preempt_boost_start_gp(void) +{ + rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; + if (rcu_preempt_ctrlblk.boosted_this_gp > 0) + rcu_preempt_ctrlblk.boosted_this_gp = 0; +} + +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * If there is no RCU priority boosting, we don't boost. + */ +static int rcu_boost(void) +{ + return 0; +} + +/* + * If there is no RCU priority boosting, we don't initiate boosting. + */ +static void rcu_initiate_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, we don't initiate expedited boosting. + */ +static void rcu_initiate_expedited_boost(void) +{ +} + +/* + * If there is no RCU priority boosting, nothing to do at grace-period start. + */ +static void rcu_preempt_boost_start_gp(void) +{ +} + +#endif /* else #ifdef CONFIG_RCU_BOOST */ + /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void) rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; - /* - * If there is no GP, or if blocked readers are still blocking GP, - * then there is nothing more to do. - */ + /* If there is no GP then there is nothing more to do. */ if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) return; + /* If there are blocked readers, go check up on boosting. */ + if (rcu_preempt_blocked_readers_cgp()) { + rcu_initiate_boost(); + return; + } /* Advance callbacks. */ rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; @@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void) /* If there are done callbacks, cause them to be invoked. */ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) - invoke_rcu_cbs(); + invoke_rcu_kthread(); } /* @@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void) rcu_preempt_ctrlblk.gp_tasks = rcu_preempt_ctrlblk.blkd_tasks.next; + /* Set up for RCU priority boosting. */ + rcu_preempt_boost_start_gp(); + /* If there is no running reader, CPU is done with GP. */ if (!rcu_preempt_running_reader()) rcu_preempt_cpu_qs(); @@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t) */ empty = !rcu_preempt_blocked_readers_cgp(); empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; - np = t->rcu_node_entry.next; - if (np == &rcu_preempt_ctrlblk.blkd_tasks) - np = NULL; + np = rcu_next_node_entry(t); list_del(&t->rcu_node_entry); if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) rcu_preempt_ctrlblk.gp_tasks = np; if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) rcu_preempt_ctrlblk.exp_tasks = np; +#ifdef CONFIG_RCU_BOOST + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) + rcu_preempt_ctrlblk.boost_tasks = np; +#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&t->rcu_node_entry); /* @@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t) if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) rcu_report_exp_done(); } +#ifdef CONFIG_RCU_BOOST + /* Unboost self if was boosted. */ + if (special & RCU_READ_UNLOCK_BOOSTED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; + rt_mutex_unlock(t->rcu_boost_mutex); + t->rcu_boost_mutex = NULL; + } +#endif /* #ifdef CONFIG_RCU_BOOST */ local_irq_restore(flags); } @@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void) rcu_preempt_cpu_qs(); if (&rcu_preempt_ctrlblk.rcb.rcucblist != rcu_preempt_ctrlblk.rcb.donetail) - invoke_rcu_cbs(); + invoke_rcu_kthread(); if (rcu_preempt_gp_in_progress() && rcu_cpu_blocking_cur_gp() && rcu_preempt_running_reader()) @@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void) /* Wait for tail of ->blkd_tasks list to drain. */ if (rcu_preempted_readers_exp()) + rcu_initiate_expedited_boost(); wait_event(sync_rcu_preempt_exp_wq, !rcu_preempted_readers_exp()); @@ -574,6 +747,15 @@ void exit_rcu(void) #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +/* + * Because preemptible RCU does not exist, it is never necessary to + * boost preempted RCU readers. + */ +static int rcu_boost(void) +{ + return 0; +} + /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void) } #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +#ifdef CONFIG_RCU_BOOST +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO +#else /* #ifdef CONFIG_RCU_BOOST */ +#define RCU_BOOST_PRIO 1 +#endif /* #else #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3-70-g09d2