From f885b7f2b2de70be266d2cecc476f773a1e2ca5d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Apr 2012 15:52:53 -0700 Subject: rcu: Control RCU_FANOUT_LEAF from boot-time parameter Although making RCU_FANOUT_LEAF a kernel configuration parameter rather than a fixed constant makes it easier for people to decrease cache-miss overhead for large systems, it is of little help for people who must run a single pre-built kernel binary. This commit therefore allows the value of RCU_FANOUT_LEAF to be increased (but not decreased!) via a boot-time parameter named rcutree.rcu_fanout_leaf. Reported-by: Mike Galbraith Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 14 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396..a4c592b66e1 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,17 +60,10 @@ /* Data structures. */ -static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; +static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; #define RCU_STATE_INITIALIZER(structname) { \ .level = { &structname##_state.node[0] }, \ - .levelcnt = { \ - NUM_RCU_LVL_0, /* root of hierarchy. */ \ - NUM_RCU_LVL_1, \ - NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, \ - NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ - }, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ @@ -91,6 +84,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +module_param(rcu_fanout_leaf, int, 0); +int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; +static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ + NUM_RCU_LVL_0, + NUM_RCU_LVL_1, + NUM_RCU_LVL_2, + NUM_RCU_LVL_3, + NUM_RCU_LVL_4, +}; +int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ + /* * The rcu_scheduler_active variable transitions from zero to one just * before the first task is spawned. So when this variable is zero, RCU @@ -2574,9 +2580,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - for (i = NUM_RCU_LVLS - 1; i > 0; i--) + for (i = rcu_num_lvls - 1; i > 0; i--) rsp->levelspread[i] = CONFIG_RCU_FANOUT; - rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; + rsp->levelspread[0] = rcu_fanout_leaf; } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ static void __init rcu_init_levelspread(struct rcu_state *rsp) @@ -2586,7 +2592,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) int i; cprv = NR_CPUS; - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; cprv = ccur; @@ -2613,13 +2619,15 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* Initialize the level-tracking arrays. */ - for (i = 1; i < NUM_RCU_LVLS; i++) + for (i = 0; i < rcu_num_lvls; i++) + rsp->levelcnt[i] = num_rcu_lvl[i]; + for (i = 1; i < rcu_num_lvls; i++) rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; rcu_init_levelspread(rsp); /* Initialize the elements themselves, starting from the leaves. */ - for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + for (i = rcu_num_lvls - 1; i >= 0; i--) { cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { @@ -2649,7 +2657,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rsp->rda = rda; - rnp = rsp->level[NUM_RCU_LVLS - 1]; + rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) rnp++; @@ -2658,11 +2666,72 @@ static void __init rcu_init_one(struct rcu_state *rsp, } } +/* + * Compute the rcu_node tree geometry from kernel parameters. This cannot + * replace the definitions in rcutree.h because those are needed to size + * the ->node array in the rcu_state structure. + */ +static void __init rcu_init_geometry(void) +{ + int i; + int j; + int n = NR_CPUS; + int rcu_capacity[MAX_RCU_LVLS + 1]; + + /* If the compile-time values are accurate, just leave. */ + if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) + return; + + /* + * Compute number of nodes that can be handled an rcu_node tree + * with the given number of levels. Setting rcu_capacity[0] makes + * some of the arithmetic easier. + */ + rcu_capacity[0] = 1; + rcu_capacity[1] = rcu_fanout_leaf; + for (i = 2; i <= MAX_RCU_LVLS; i++) + rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + + /* + * The boot-time rcu_fanout_leaf parameter is only permitted + * to increase the leaf-level fanout, not decrease it. Of course, + * the leaf-level fanout cannot exceed the number of bits in + * the rcu_node masks. Finally, the tree must be able to accommodate + * the configured number of CPUs. Complain and fall back to the + * compile-time values if these limits are exceeded. + */ + if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + rcu_fanout_leaf > sizeof(unsigned long) * 8 || + n > rcu_capacity[MAX_RCU_LVLS]) { + WARN_ON(1); + return; + } + + /* Calculate the number of rcu_nodes at each level of the tree. */ + for (i = 1; i <= MAX_RCU_LVLS; i++) + if (n <= rcu_capacity[i]) { + for (j = 0; j <= i; j++) + num_rcu_lvl[j] = + DIV_ROUND_UP(n, rcu_capacity[i - j]); + rcu_num_lvls = i; + for (j = i + 1; j <= MAX_RCU_LVLS; j++) + num_rcu_lvl[j] = 0; + break; + } + + /* Calculate the total number of rcu_node structures. */ + rcu_num_nodes = 0; + for (i = 0; i <= MAX_RCU_LVLS; i++) + rcu_num_nodes += num_rcu_lvl[i]; + rcu_num_nodes -= n; +} + void __init rcu_init(void) { int cpu; rcu_bootup_announce(); + rcu_init_geometry(); rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); -- cgit v1.2.3-70-g09d2 From cca6f3931920a7547d02e68adc2ca635bea5600c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 May 2012 21:00:28 -0700 Subject: rcu: Size rcu_node tree from nr_cpu_ids rather than NR_CPUS The rcu_node tree array is sized based on compile-time constants, including NR_CPUS. Although this approach has worked well in the past, the recent trend by many distros to define NR_CPUS=4096 results in excessive grace-period-initialization latencies. This commit therefore substitutes the run-time computed nr_cpu_ids for the compile-time NR_CPUS when building the tree. This can result in much of the compile-time-allocated rcu_node array being unused. If this is a major problem, you are in a specialized situation anyway, so you can manually adjust the NR_CPUS, RCU_FANOUT, and RCU_FANOUT_LEAF kernel config parameters. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 +- kernel/rcutree_plugin.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a4c592b66e1..0fdbc5e0730 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2675,7 +2675,7 @@ static void __init rcu_init_geometry(void) { int i; int j; - int n = NR_CPUS; + int n = nr_cpu_ids; int rcu_capacity[MAX_RCU_LVLS + 1]; /* If the compile-time values are accurate, just leave. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 70e0fd256cc..ef2b5231afa 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -72,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void) #endif if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); + if (nr_cpu_ids != NR_CPUS) + printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); } #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3-70-g09d2 From 6c90cc7bf077f28144013e949ee0c122012d194a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 19:41:41 -0700 Subject: rcu: Prevent excessive line length in RCU_STATE_INITIALIZER() Upcoming rcu_barrier() concurrency commits will result in line lengths greater than 80 characters in the RCU_STATE_INITIALIZER(), so this commit shortens the name of the macro's argument to prevent this. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0fdbc5e0730..dd7fd96c90c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -62,18 +62,18 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(structname) { \ - .level = { &structname##_state.node[0] }, \ +#define RCU_STATE_INITIALIZER(sname) { \ + .level = { &sname##_state.node[0] }, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ - .orphan_nxttail = &structname##_state.orphan_nxtlist, \ - .orphan_donetail = &structname##_state.orphan_donelist, \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ + .orphan_nxttail = &sname##_state.orphan_nxtlist, \ + .orphan_donetail = &sname##_state.orphan_donelist, \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ - .name = #structname, \ + .name = #sname, \ } struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); -- cgit v1.2.3-70-g09d2 From 037b64ed0bf2405a1a01542164d3418564b44fff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 23:26:01 -0700 Subject: rcu: Place pointer to call_rcu() in rcu_data structure This is a preparatory commit for increasing rcu_barrier()'s concurrency. It adds a pointer in the rcu_data structure to the corresponding call_rcu() function. This allows a pointer to the rcu_data structure to imply the function pointer, which allows _rcu_barrier() state to be placed in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 27 ++++++++++++--------------- kernel/rcutree.h | 2 ++ kernel/rcutree_plugin.h | 5 +++-- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dd7fd96c90c..00c518fa34b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -62,8 +62,9 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname) { \ +#define RCU_STATE_INITIALIZER(sname, cr) { \ .level = { &sname##_state.node[0] }, \ + .call = cr, \ .fqs_state = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ @@ -76,10 +77,11 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .name = #sname, \ } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); +struct rcu_state rcu_sched_state = + RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; @@ -2282,21 +2284,17 @@ static void rcu_barrier_func(void *type) { int cpu = smp_processor_id(); struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + struct rcu_state *rsp = type; atomic_inc(&rcu_barrier_cpu_count); - call_rcu_func = type; - call_rcu_func(head, rcu_barrier_callback); + rsp->call(head, rcu_barrier_callback); } /* * Orchestrate the specified type of RCU barrier, waiting for all * RCU callbacks of the specified type to complete. */ -static void _rcu_barrier(struct rcu_state *rsp, - void (*call_rcu_func)(struct rcu_head *head, - void (*func)(struct rcu_head *head))) +static void _rcu_barrier(struct rcu_state *rsp) { int cpu; unsigned long flags; @@ -2348,8 +2346,7 @@ static void _rcu_barrier(struct rcu_state *rsp, while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) schedule_timeout_interruptible(1); } else if (ACCESS_ONCE(rdp->qlen)) { - smp_call_function_single(cpu, rcu_barrier_func, - (void *)call_rcu_func, 1); + smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); preempt_enable(); } else { preempt_enable(); @@ -2370,7 +2367,7 @@ static void _rcu_barrier(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rsp->onofflock, flags); atomic_inc(&rcu_barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - call_rcu_func(&rh, rcu_barrier_callback); + rsp->call(&rh, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each @@ -2393,7 +2390,7 @@ static void _rcu_barrier(struct rcu_state *rsp, */ void rcu_barrier_bh(void) { - _rcu_barrier(&rcu_bh_state, call_rcu_bh); + _rcu_barrier(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -2402,7 +2399,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_bh); */ void rcu_barrier_sched(void) { - _rcu_barrier(&rcu_sched_state, call_rcu_sched); + _rcu_barrier(&rcu_sched_state); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 780a0195d35..049896a835d 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -364,6 +364,8 @@ struct rcu_state { u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ + void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ + void (*func)(struct rcu_head *head)); /* The following fields are guarded by the root rcu_node's lock. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ef2b5231afa..9cb3a68819f 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -78,7 +78,8 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU -struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); +struct rcu_state rcu_preempt_state = + RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; @@ -944,7 +945,7 @@ static int rcu_preempt_cpu_has_callbacks(int cpu) */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state, call_rcu); + _rcu_barrier(&rcu_preempt_state); } EXPORT_SYMBOL_GPL(rcu_barrier); -- cgit v1.2.3-70-g09d2 From 06668efa9180f4824fe846a8ff96338c18646bc7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 May 2012 23:57:46 -0700 Subject: rcu: Move _rcu_barrier()'s rcu_head structures to rcu_data structures In order for multiple flavors of RCU to each concurrently run one rcu_barrier(), each flavor needs its own per-CPU set of rcu_head structures. This commit therefore moves _rcu_barrier()'s set of per-CPU rcu_head structures from per-CPU variables to the existing per-CPU and per-RCU-flavor rcu_data structures. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 6 ++---- kernel/rcutree.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 00c518fa34b..1e552598b55 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -157,7 +157,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ -static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -2282,12 +2281,11 @@ static void rcu_barrier_callback(struct rcu_head *notused) */ static void rcu_barrier_func(void *type) { - int cpu = smp_processor_id(); - struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); struct rcu_state *rsp = type; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); atomic_inc(&rcu_barrier_cpu_count); - rsp->call(head, rcu_barrier_callback); + rsp->call(&rdp->barrier_head, rcu_barrier_callback); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 049896a835d..586d93c978f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -314,6 +314,9 @@ struct rcu_data { unsigned long n_rp_need_fqs; unsigned long n_rp_need_nothing; + /* 6) _rcu_barrier() callback. */ + struct rcu_head barrier_head; + int cpu; struct rcu_state *rsp; }; -- cgit v1.2.3-70-g09d2 From 24ebbca8ecdd5129d7f829a7cb5146aaeb531f77 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 00:34:56 -0700 Subject: rcu: Move rcu_barrier_cpu_count to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_cpu_count global variable to a new ->barrier_cpu_count field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 25 ++++++++++++++----------- kernel/rcutree.h | 1 + 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 1e552598b55..5929b021666 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -157,7 +157,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ -static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; @@ -2270,9 +2269,12 @@ static int rcu_cpu_has_callbacks(int cpu) * RCU callback function for _rcu_barrier(). If we are last, wake * up the task executing _rcu_barrier(). */ -static void rcu_barrier_callback(struct rcu_head *notused) +static void rcu_barrier_callback(struct rcu_head *rhp) { - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); + struct rcu_state *rsp = rdp->rsp; + + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rcu_barrier_completion); } @@ -2284,7 +2286,7 @@ static void rcu_barrier_func(void *type) struct rcu_state *rsp = type; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); rsp->call(&rdp->barrier_head, rcu_barrier_callback); } @@ -2297,9 +2299,9 @@ static void _rcu_barrier(struct rcu_state *rsp) int cpu; unsigned long flags; struct rcu_data *rdp; - struct rcu_head rh; + struct rcu_data rd; - init_rcu_head_on_stack(&rh); + init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rcu_barrier_mutex); @@ -2324,7 +2326,7 @@ static void _rcu_barrier(struct rcu_state *rsp) * us -- but before CPU 1's orphaned callbacks are invoked!!! */ init_completion(&rcu_barrier_completion); - atomic_set(&rcu_barrier_cpu_count, 1); + atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); @@ -2363,15 +2365,16 @@ static void _rcu_barrier(struct rcu_state *rsp) rcu_adopt_orphan_cbs(rsp); rsp->rcu_barrier_in_progress = NULL; raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rcu_barrier_cpu_count); + atomic_inc(&rsp->barrier_cpu_count); smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - rsp->call(&rh, rcu_barrier_callback); + rd.rsp = rsp; + rsp->call(&rd.barrier_head, rcu_barrier_callback); /* * Now that we have an rcu_barrier_callback() callback on each * CPU, and thus each counted, remove the initial count. */ - if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rcu_barrier_completion); /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ @@ -2380,7 +2383,7 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); - destroy_rcu_head_on_stack(&rh); + destroy_rcu_head_on_stack(&rd.barrier_head); } /** diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 586d93c978f..c57ef0b7f09 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -400,6 +400,7 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + atomic_t barrier_cpu_count; /* # CPUs waiting on. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3-70-g09d2 From 7db74df88b52844f4e966901e2972bba725e6766 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 03:03:37 -0700 Subject: rcu: Move rcu_barrier_completion to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_completion global variable to a new ->barrier_completion field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 9 ++++----- kernel/rcutree.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5929b021666..ca7d1678ac7 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -158,7 +158,6 @@ unsigned long rcutorture_vernum; /* State information for rcu_barrier() and friends. */ static DEFINE_MUTEX(rcu_barrier_mutex); -static struct completion rcu_barrier_completion; /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -2275,7 +2274,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp) struct rcu_state *rsp = rdp->rsp; if (atomic_dec_and_test(&rsp->barrier_cpu_count)) - complete(&rcu_barrier_completion); + complete(&rsp->barrier_completion); } /* @@ -2325,7 +2324,7 @@ static void _rcu_barrier(struct rcu_state *rsp) * 6. Both rcu_barrier_callback() callbacks are invoked, awakening * us -- but before CPU 1's orphaned callbacks are invoked!!! */ - init_completion(&rcu_barrier_completion); + init_completion(&rsp->barrier_completion); atomic_set(&rsp->barrier_cpu_count, 1); raw_spin_lock_irqsave(&rsp->onofflock, flags); rsp->rcu_barrier_in_progress = current; @@ -2375,10 +2374,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * CPU, and thus each counted, remove the initial count. */ if (atomic_dec_and_test(&rsp->barrier_cpu_count)) - complete(&rcu_barrier_completion); + complete(&rsp->barrier_completion); /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ - wait_for_completion(&rcu_barrier_completion); + wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_barrier_mutex); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c57ef0b7f09..d1ca4424122 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -401,6 +401,7 @@ struct rcu_state { /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ + struct completion barrier_completion; /* Wake at barrier end. */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3-70-g09d2 From 7be7f0be907224445acc62b3884c892f38b7ff40 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 05:18:53 -0700 Subject: rcu: Move rcu_barrier_mutex to rcu_state structure In order to allow each RCU flavor to concurrently execute its rcu_barrier() function, it is necessary to move the relevant state to the rcu_state structure. This commit therefore moves the rcu_barrier_mutex global variable to a new ->barrier_mutex field in the rcu_state structure. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 9 +++------ kernel/rcutree.h | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ca7d1678ac7..ff992ac24e7 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -71,6 +71,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ + .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ @@ -155,10 +156,6 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); unsigned long rcutorture_testseq; unsigned long rcutorture_vernum; -/* State information for rcu_barrier() and friends. */ - -static DEFINE_MUTEX(rcu_barrier_mutex); - /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -2303,7 +2300,7 @@ static void _rcu_barrier(struct rcu_state *rsp) init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ - mutex_lock(&rcu_barrier_mutex); + mutex_lock(&rsp->barrier_mutex); smp_mb(); /* Prevent any prior operations from leaking in. */ @@ -2380,7 +2377,7 @@ static void _rcu_barrier(struct rcu_state *rsp) wait_for_completion(&rsp->barrier_completion); /* Other rcu_barrier() invocations can now safely proceed. */ - mutex_unlock(&rcu_barrier_mutex); + mutex_unlock(&rsp->barrier_mutex); destroy_rcu_head_on_stack(&rd.barrier_head); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d1ca4424122..7641aec3e59 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -400,6 +400,7 @@ struct rcu_state { struct task_struct *rcu_barrier_in_progress; /* Task doing rcu_barrier(), */ /* or NULL if no barrier. */ + struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ raw_spinlock_t fqslock; /* Only one task forcing */ -- cgit v1.2.3-70-g09d2 From cfed0a85dad921c683e9c0d25b072bcc5745ede0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 15 Jun 2012 18:22:05 -0700 Subject: rcu: Remove needless initialization For global variables, C defaults all fields to zero. The initialization of the rcu_state structure's ->n_force_qs and ->n_force_qs_ngp fields is therefore redundant, so this commit removes these initializations. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ff992ac24e7..44a8fda9be8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -73,8 +73,6 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ - .n_force_qs = 0, \ - .n_force_qs_ngp = 0, \ .name = #sname, \ } -- cgit v1.2.3-70-g09d2 From cf3a9c4842b1e097dbe0854933c471d43dd24f69 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 May 2012 14:56:46 -0700 Subject: rcu: Increase rcu_barrier() concurrency The traditional rcu_barrier() implementation has serialized all requests, regardless of RCU flavor, and also does not coalesce concurrent requests. In the past, this has been good and sufficient. However, systems are getting larger and use of rcu_barrier() has been increasing. This commit therefore introduces a counter-based scheme that allows _rcu_barrier() calls for the same flavor of RCU to take advantage of each others' work. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 36 +++++++++++++++++++++++++++++++++++- kernel/rcutree.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 44a8fda9be8..6bb5d562253 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2294,13 +2294,41 @@ static void _rcu_barrier(struct rcu_state *rsp) unsigned long flags; struct rcu_data *rdp; struct rcu_data rd; + unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); + unsigned long snap_done; init_rcu_head_on_stack(&rd.barrier_head); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rsp->barrier_mutex); - smp_mb(); /* Prevent any prior operations from leaking in. */ + /* + * Ensure that all prior references, including to ->n_barrier_done, + * are ordered before the _rcu_barrier() machinery. + */ + smp_mb(); /* See above block comment. */ + + /* + * Recheck ->n_barrier_done to see if others did our work for us. + * This means checking ->n_barrier_done for an even-to-odd-to-even + * transition. The "if" expression below therefore rounds the old + * value up to the next even number and adds two before comparing. + */ + snap_done = ACCESS_ONCE(rsp->n_barrier_done); + if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + smp_mb(); /* caller's subsequent code after above check. */ + mutex_unlock(&rsp->barrier_mutex); + return; + } + + /* + * Increment ->n_barrier_done to avoid duplicate work. Use + * ACCESS_ONCE() to prevent the compiler from speculating + * the increment to precede the early-exit check. + */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); + smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ /* * Initialize the count to one rather than to zero in order to @@ -2371,6 +2399,12 @@ static void _rcu_barrier(struct rcu_state *rsp) if (atomic_dec_and_test(&rsp->barrier_cpu_count)) complete(&rsp->barrier_completion); + /* Increment ->n_barrier_done to prevent duplicate work. */ + smp_mb(); /* Keep increment after above mechanism. */ + ACCESS_ONCE(rsp->n_barrier_done)++; + WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); + smp_mb(); /* Keep increment before caller's subsequent code. */ + /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ wait_for_completion(&rsp->barrier_completion); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7641aec3e59..be10286ad38 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -403,6 +403,8 @@ struct rcu_state { struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ + unsigned long n_barrier_done; /* ++ at start and end of */ + /* _rcu_barrier(). */ raw_spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ -- cgit v1.2.3-70-g09d2 From a83eff0a82a7f3f14fea477fd41e6c082e7fc96a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 May 2012 18:47:05 -0700 Subject: rcu: Add tracing for _rcu_barrier() This commit adds event tracing for _rcu_barrier() execution. This is defined only if RCU_TRACE=y. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ kernel/rcutree.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d274734b2aa..5bde94d8585 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -541,6 +541,50 @@ TRACE_EVENT(rcu_torture_read, __entry->rcutorturename, __entry->rhp) ); +/* + * Tracepoint for _rcu_barrier() execution. The string "s" describes + * the _rcu_barrier phase: + * "Begin": rcu_barrier_callback() started. + * "Check": rcu_barrier_callback() checking for piggybacking. + * "EarlyExit": rcu_barrier_callback() piggybacked, thus early exit. + * "Inc1": rcu_barrier_callback() piggyback check counter incremented. + * "Offline": rcu_barrier_callback() found offline CPU + * "OnlineQ": rcu_barrier_callback() found online CPU with callbacks. + * "OnlineNQ": rcu_barrier_callback() found online CPU, no callbacks. + * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. + * "LastCB": An rcu_barrier_callback() invoked the last callback. + * "Inc2": rcu_barrier_callback() piggyback check counter incremented. + * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument + * is the count of remaining callbacks, and "done" is the piggybacking count. + */ +TRACE_EVENT(rcu_barrier, + + TP_PROTO(char *rcuname, char *s, int cpu, int cnt, unsigned long done), + + TP_ARGS(rcuname, s, cpu, cnt, done), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(char *, s) + __field(int, cpu) + __field(int, cnt) + __field(unsigned long, done) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->s = s; + __entry->cpu = cpu; + __entry->cnt = cnt; + __entry->done = done; + ), + + TP_printk("%s %s cpu %d remaining %d # %lu", + __entry->rcuname, __entry->s, __entry->cpu, __entry->cnt, + __entry->done) +); + #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -564,6 +608,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6bb5d562253..dda43d82650 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2259,6 +2259,17 @@ static int rcu_cpu_has_callbacks(int cpu) rcu_preempt_cpu_has_callbacks(cpu); } +/* + * Helper function for _rcu_barrier() tracing. If tracing is disabled, + * the compiler is expected to optimize this away. + */ +static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, + int cpu, unsigned long done) +{ + trace_rcu_barrier(rsp->name, s, cpu, + atomic_read(&rsp->barrier_cpu_count), done); +} + /* * RCU callback function for _rcu_barrier(). If we are last, wake * up the task executing _rcu_barrier(). @@ -2268,8 +2279,12 @@ static void rcu_barrier_callback(struct rcu_head *rhp) struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); struct rcu_state *rsp = rdp->rsp; - if (atomic_dec_and_test(&rsp->barrier_cpu_count)) + if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { + _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); complete(&rsp->barrier_completion); + } else { + _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); + } } /* @@ -2280,6 +2295,7 @@ static void rcu_barrier_func(void *type) struct rcu_state *rsp = type; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); + _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); rsp->call(&rdp->barrier_head, rcu_barrier_callback); } @@ -2298,6 +2314,7 @@ static void _rcu_barrier(struct rcu_state *rsp) unsigned long snap_done; init_rcu_head_on_stack(&rd.barrier_head); + _rcu_barrier_trace(rsp, "Begin", -1, snap); /* Take mutex to serialize concurrent rcu_barrier() requests. */ mutex_lock(&rsp->barrier_mutex); @@ -2315,7 +2332,9 @@ static void _rcu_barrier(struct rcu_state *rsp) * value up to the next even number and adds two before comparing. */ snap_done = ACCESS_ONCE(rsp->n_barrier_done); + _rcu_barrier_trace(rsp, "Check", -1, snap_done); if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { + _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rsp->barrier_mutex); return; @@ -2328,6 +2347,7 @@ static void _rcu_barrier(struct rcu_state *rsp) */ ACCESS_ONCE(rsp->n_barrier_done)++; WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); + _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ /* @@ -2364,13 +2384,19 @@ static void _rcu_barrier(struct rcu_state *rsp) preempt_disable(); rdp = per_cpu_ptr(rsp->rda, cpu); if (cpu_is_offline(cpu)) { + _rcu_barrier_trace(rsp, "Offline", cpu, + rsp->n_barrier_done); preempt_enable(); while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) schedule_timeout_interruptible(1); } else if (ACCESS_ONCE(rdp->qlen)) { + _rcu_barrier_trace(rsp, "OnlineQ", cpu, + rsp->n_barrier_done); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); preempt_enable(); } else { + _rcu_barrier_trace(rsp, "OnlineNQ", cpu, + rsp->n_barrier_done); preempt_enable(); } } @@ -2403,6 +2429,7 @@ static void _rcu_barrier(struct rcu_state *rsp) smp_mb(); /* Keep increment after above mechanism. */ ACCESS_ONCE(rsp->n_barrier_done)++; WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); + _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); smp_mb(); /* Keep increment before caller's subsequent code. */ /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ -- cgit v1.2.3-70-g09d2 From 1bca8cf1a2c3c6683b12ad28a3e826ca7a834978 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 09:40:38 -0700 Subject: rcu: Remove unneeded __rcu_process_callbacks() argument With the advent of __this_cpu_ptr(), it is no longer necessary to pass both the rcu_state and rcu_data structures into __rcu_process_callbacks(). This commit therefore computes the rcu_data pointer from the rcu_state pointer within __rcu_process_callbacks() so that callers can pass in only the pointer to the rcu_state structure. This paves the way for linking the rcu_state structures together and iterating over them. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 8 ++++---- kernel/rcutree_plugin.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dda43d82650..5376a156be8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1788,9 +1788,10 @@ unlock_fqs_ret: * whom the rdp belongs. */ static void -__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) +__rcu_process_callbacks(struct rcu_state *rsp) { unsigned long flags; + struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); WARN_ON_ONCE(rdp->beenonline == 0); @@ -1827,9 +1828,8 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) static void rcu_process_callbacks(struct softirq_action *unused) { trace_rcu_utilization("Start RCU core"); - __rcu_process_callbacks(&rcu_sched_state, - &__get_cpu_var(rcu_sched_data)); - __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + __rcu_process_callbacks(&rcu_sched_state); + __rcu_process_callbacks(&rcu_bh_state); rcu_preempt_process_callbacks(); trace_rcu_utilization("End RCU core"); } diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 9cb3a68819f..5a80cdd9a0a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -687,8 +687,7 @@ static void rcu_preempt_check_callbacks(int cpu) */ static void rcu_preempt_process_callbacks(void) { - __rcu_process_callbacks(&rcu_preempt_state, - &__get_cpu_var(rcu_preempt_data)); + __rcu_process_callbacks(&rcu_preempt_state); } #ifdef CONFIG_RCU_BOOST -- cgit v1.2.3-70-g09d2 From 6ce75a2326e6f8b3bdfb60e1de7934b89858e87b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 11:01:13 -0700 Subject: rcu: Introduce for_each_rcu_flavor() and use it The arrival of TREE_PREEMPT_RCU some years back included some ugly code involving either #ifdef or #ifdef'ed wrapper functions to iterate over all non-SRCU flavors of RCU. This commit therefore introduces a for_each_rcu_flavor() iterator over the rcu_state structures for each flavor of RCU to clean up a bit of the ugliness. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 53 +++++++++++++--------- kernel/rcutree.h | 12 +++-- kernel/rcutree_plugin.h | 116 ------------------------------------------------ 3 files changed, 37 insertions(+), 144 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5376a156be8..b61c3ffc80e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -84,6 +84,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; +LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; @@ -860,9 +861,10 @@ static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) */ void rcu_cpu_stall_reset(void) { - rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; - rcu_preempt_stall_reset(); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rsp->jiffies_stall = jiffies + ULONG_MAX / 2; } static struct notifier_block rcu_panic_block = { @@ -1827,10 +1829,11 @@ __rcu_process_callbacks(struct rcu_state *rsp) */ static void rcu_process_callbacks(struct softirq_action *unused) { + struct rcu_state *rsp; + trace_rcu_utilization("Start RCU core"); - __rcu_process_callbacks(&rcu_sched_state); - __rcu_process_callbacks(&rcu_bh_state); - rcu_preempt_process_callbacks(); + for_each_rcu_flavor(rsp) + __rcu_process_callbacks(rsp); trace_rcu_utilization("End RCU core"); } @@ -2241,9 +2244,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) */ static int rcu_pending(int cpu) { - return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || - __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_pending(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) + return 1; + return 0; } /* @@ -2253,10 +2259,13 @@ static int rcu_pending(int cpu) */ static int rcu_cpu_has_callbacks(int cpu) { + struct rcu_state *rsp; + /* RCU callbacks either ready or pending? */ - return per_cpu(rcu_sched_data, cpu).nxtlist || - per_cpu(rcu_bh_data, cpu).nxtlist || - rcu_preempt_cpu_has_callbacks(cpu); + for_each_rcu_flavor(rsp) + if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) + return 1; + return 0; } /* @@ -2551,9 +2560,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) static void __cpuinit rcu_prepare_cpu(int cpu) { - rcu_init_percpu_data(cpu, &rcu_sched_state, 0); - rcu_init_percpu_data(cpu, &rcu_bh_state, 0); - rcu_preempt_init_percpu_data(cpu); + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_init_percpu_data(cpu, rsp, + strcmp(rsp->name, "rcu_preempt") == 0); } /* @@ -2565,6 +2576,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; + struct rcu_state *rsp; trace_rcu_utilization("Start CPU hotplug"); switch (action) { @@ -2589,18 +2601,16 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, * touch any data without introducing corruption. We send the * dying CPU's callbacks to an arbitrarily chosen online CPU. */ - rcu_cleanup_dying_cpu(&rcu_bh_state); - rcu_cleanup_dying_cpu(&rcu_sched_state); - rcu_preempt_cleanup_dying_cpu(); + for_each_rcu_flavor(rsp) + rcu_cleanup_dying_cpu(rsp); rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); - rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); - rcu_preempt_cleanup_dead_cpu(cpu); + for_each_rcu_flavor(rsp) + rcu_cleanup_dead_cpu(cpu, rsp); break; default: break; @@ -2717,6 +2727,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, per_cpu_ptr(rsp->rda, i)->mynode = rnp; rcu_boot_init_percpu_data(i, rsp); } + list_add(&rsp->flavors, &rcu_struct_flavors); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index be10286ad38..b92c4550a6e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -422,8 +422,13 @@ struct rcu_state { unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ char *name; /* Name of structure. */ + struct list_head flavors; /* List of RCU flavors. */ }; +extern struct list_head rcu_struct_flavors; +#define for_each_rcu_flavor(rsp) \ + list_for_each_entry((rsp), &rcu_struct_flavors, flavors) + /* Return values for rcu_preempt_offline_tasks(). */ #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ @@ -466,25 +471,18 @@ static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); -static void rcu_preempt_stall_reset(void); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static int rcu_preempt_offline_tasks(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_preempt_cleanup_dead_cpu(int cpu); static void rcu_preempt_check_callbacks(int cpu); -static void rcu_preempt_process_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ -static int rcu_preempt_pending(int cpu); -static int rcu_preempt_cpu_has_callbacks(int cpu); -static void __cpuinit rcu_preempt_init_percpu_data(int cpu); -static void rcu_preempt_cleanup_dying_cpu(void); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5a80cdd9a0a..d18b4d383af 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -544,16 +544,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return ndetected; } -/* - * Suppress preemptible RCU's CPU stall warnings by pushing the - * time of the next stall-warning message comfortably far into the - * future. - */ -static void rcu_preempt_stall_reset(void) -{ - rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; -} - /* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace @@ -654,14 +644,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Do CPU-offline processing for preemptible RCU. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ - rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); -} - /* * Check for a quiescent state from the current CPU. When a task blocks, * the task is recorded in the corresponding CPU's rcu_node structure, @@ -682,14 +664,6 @@ static void rcu_preempt_check_callbacks(int cpu) t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; } -/* - * Process callbacks for preemptible RCU. - */ -static void rcu_preempt_process_callbacks(void) -{ - __rcu_process_callbacks(&rcu_preempt_state); -} - #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void) @@ -921,24 +895,6 @@ mb_ret: } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -/* - * Check to see if there is any immediate preemptible-RCU-related work - * to be done. - */ -static int rcu_preempt_pending(int cpu) -{ - return __rcu_pending(&rcu_preempt_state, - &per_cpu(rcu_preempt_data, cpu)); -} - -/* - * Does preemptible RCU have callbacks on this CPU? - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return !!per_cpu(rcu_preempt_data, cpu).nxtlist; -} - /** * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. */ @@ -948,23 +904,6 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Initialize preemptible RCU's per-CPU data. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ - rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); -} - -/* - * Move preemptible RCU's callbacks from dying CPU to other online CPU - * and record a quiescent state. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ - rcu_cleanup_dying_cpu(&rcu_preempt_state); -} - /* * Initialize preemptible RCU's state structures. */ @@ -1049,14 +988,6 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return 0; } -/* - * Because preemptible RCU does not exist, there is no need to suppress - * its CPU stall warnings. - */ -static void rcu_preempt_stall_reset(void) -{ -} - /* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for @@ -1084,14 +1015,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Because preemptible RCU does not exist, it never needs CPU-offline - * processing. - */ -static void rcu_preempt_cleanup_dead_cpu(int cpu) -{ -} - /* * Because preemptible RCU does not exist, it never has any callbacks * to check. @@ -1100,14 +1023,6 @@ static void rcu_preempt_check_callbacks(int cpu) { } -/* - * Because preemptible RCU does not exist, it never has any callbacks - * to process. - */ -static void rcu_preempt_process_callbacks(void) -{ -} - /* * Queue an RCU callback for lazy invocation after a grace period. * This will likely be later named something like "call_rcu_lazy()", @@ -1148,22 +1063,6 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -/* - * Because preemptible RCU does not exist, it never has any work to do. - */ -static int rcu_preempt_pending(int cpu) -{ - return 0; -} - -/* - * Because preemptible RCU does not exist, it never has callbacks - */ -static int rcu_preempt_cpu_has_callbacks(int cpu) -{ - return 0; -} - /* * Because preemptible RCU does not exist, rcu_barrier() is just * another name for rcu_barrier_sched(). @@ -1174,21 +1073,6 @@ void rcu_barrier(void) } EXPORT_SYMBOL_GPL(rcu_barrier); -/* - * Because preemptible RCU does not exist, there is no per-CPU - * data to initialize. - */ -static void __cpuinit rcu_preempt_init_percpu_data(int cpu) -{ -} - -/* - * Because there is no preemptible RCU, there is no cleanup to do. - */ -static void rcu_preempt_cleanup_dying_cpu(void) -{ -} - /* * Because preemptible RCU does not exist, it need not be initialized. */ -- cgit v1.2.3-70-g09d2 From ff015030c939f0bec68fa9b8898da3aaa7fe55ea Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jun 2012 13:16:58 -0700 Subject: rcu: RCU_SAVE_DYNTICK code no longer ever dead Before RCU had unified idle, the RCU_SAVE_DYNTICK leg of the switch statement in force_quiescent_state() was dead code for CONFIG_NO_HZ=n kernel builds. With unified idle, the code is never dead. This commit therefore removes the "if" statement designed to make gcc aware of when the code was and was not dead. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b61c3ffc80e..967b4bed2cf 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1747,8 +1747,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: - if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) - break; /* So gcc recognizes the dead code. */ raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ -- cgit v1.2.3-70-g09d2 From 285fe29481d865ae381ad3924c80894e6968c2d8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 08:45:12 -0700 Subject: rcu: Fix detection of abruptly-ending stall The code that attempts to identify stalls that end just as we detect them is broken by both flavors of initialization failure. This commit therefore properly initializes and computes the count of the number of reasons why the RCU grace period is stalled. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396..dc8c5284fe0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -733,7 +733,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) int cpu; long delta; unsigned long flags; - int ndetected; + int ndetected = 0; struct rcu_node *rnp = rcu_get_root(rsp); /* Only let one CPU complain about others per time interval. */ @@ -774,7 +774,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) */ rnp = rcu_get_root(rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - ndetected = rcu_print_task_stall(rnp); + ndetected += rcu_print_task_stall(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); print_cpu_stall_info_end(); -- cgit v1.2.3-70-g09d2 From 3f5d3ea64f1783f0d4ea0d35890ae3297f045a8b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 15:39:56 -0700 Subject: rcu: Consolidate duplicate callback-list initialization There are a couple of open-coded initializations of the rcu_data structure's RCU callback list. This commit therefore consolidates them into a new init_callback_list() function. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index dc8c5284fe0..81e0394e46a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -936,6 +936,18 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) return ret; } +/* + * Initialize the specified rcu_data structure's callback list to empty. + */ +static void init_callback_list(struct rcu_data *rdp) +{ + int i; + + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; +} + /* * Advance this CPU's callbacks, but only if the current grace period * has ended. This may be called only from the CPU to whom the rdp @@ -1328,8 +1340,6 @@ static void rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { - int i; - /* * Orphan the callbacks. First adjust the counts. This is safe * because ->onofflock excludes _rcu_barrier()'s adoption of @@ -1369,9 +1379,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, } /* Finally, initialize the rcu_data structure's list to empty. */ - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + init_callback_list(rdp); } /* @@ -2407,16 +2415,13 @@ static void __init rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; - int i; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; + init_callback_list(rdp); rdp->qlen_lazy = 0; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); -- cgit v1.2.3-70-g09d2 From 1d1fb395f6dbc07b36285bbedcf01a73b57f7cb5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 15:44:42 -0700 Subject: rcu: Add ACCESS_ONCE() to ->qlen accesses The _rcu_barrier() function accesses other CPUs' rcu_data structure's ->qlen field without benefit of locking. This commit therefore adds the required ACCESS_ONCE() wrappers around accesses and updates that need it. ACCESS_ONCE() is not needed when a CPU accesses its own ->qlen, or in code that cannot run while _rcu_barrier() is sampling ->qlen fields. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 81e0394e46a..89addada3e3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1350,7 +1350,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, rsp->qlen += rdp->qlen; rdp->n_cbs_orphaned += rdp->qlen; rdp->qlen_lazy = 0; - rdp->qlen = 0; + ACCESS_ONCE(rdp->qlen) = 0; } /* @@ -1600,7 +1600,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } smp_mb(); /* List handling before counting for rcu_barrier(). */ rdp->qlen_lazy -= count_lazy; - rdp->qlen -= count; + ACCESS_ONCE(rdp->qlen) -= count; rdp->n_cbs_invoked += count; /* Reinstate batch limit if we have worked down the excess. */ @@ -1889,7 +1889,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ - rdp->qlen++; + ACCESS_ONCE(rdp->qlen)++; if (lazy) rdp->qlen_lazy++; else @@ -2423,7 +2423,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); init_callback_list(rdp); rdp->qlen_lazy = 0; - rdp->qlen = 0; + ACCESS_ONCE(rdp->qlen) = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); -- cgit v1.2.3-70-g09d2 From 62fde6edf12b60fddb13a3f0a779c8be0bb7447e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 May 2012 22:10:24 -0700 Subject: rcu: Make __call_rcu() handle invocation from idle Although __call_rcu() is handled correctly when called from a momentary non-idle period, if it is called on a CPU that RCU believes to be idle on RCU_FAST_NO_HZ kernels, the callback might be indefinitely postponed. This commit therefore ensures that RCU is aware of the new callback and has a chance to force the CPU out of dyntick-idle mode when a new callback is posted. Reported-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 13 ++++--------- kernel/rcutree.c | 15 +++++++++------ 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 31568c73452..26f6417f026 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -256,6 +256,10 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) +extern int rcu_is_cpu_idle(void); +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ + #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ @@ -267,15 +271,6 @@ static inline bool rcu_lockdep_current_cpu_online(void) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#ifdef CONFIG_PROVE_RCU -extern int rcu_is_cpu_idle(void); -#else /* !CONFIG_PROVE_RCU */ -static inline int rcu_is_cpu_idle(void) -{ - return 0; -} -#endif /* else !CONFIG_PROVE_RCU */ - static inline void rcu_lock_acquire(struct lockdep_map *map) { lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89addada3e3..a4a9c916ad3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -585,8 +585,6 @@ void rcu_nmi_exit(void) WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } -#ifdef CONFIG_PROVE_RCU - /** * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle * @@ -604,7 +602,7 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* * Is the current CPU online? Disable preemption to avoid false positives @@ -645,9 +643,7 @@ bool rcu_lockdep_current_cpu_online(void) } EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -#endif /* #ifdef CONFIG_PROVE_RCU */ +#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ /** * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle @@ -1904,6 +1900,13 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), else trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); + /* + * If called from an extended quiescent state, invoke the RCU + * core in order to force a re-evaluation of RCU's idleness. + */ + if (rcu_is_cpu_idle()) + invoke_rcu_core(); + /* If interrupts were disabled, don't dive into RCU core. */ if (irqs_disabled_flags(flags)) { local_irq_restore(flags); -- cgit v1.2.3-70-g09d2 From a16b7a693430406dc229ab0c6b154f669a2031c5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 26 May 2012 08:56:01 -0700 Subject: rcu: Prevent __call_rcu() from invoking RCU core on offline CPUs The __call_rcu() function will invoke the RCU core, for example, if it detects that the current CPU has too many callbacks. However, this can happen on an offline CPU that is on its way to the idle loop, in which case it is an error to invoke the RCU core, and the excess callbacks will be adopted in any case. This commit therefore adds checks to __call_rcu() for running on an offline CPU, refraining from invoking the RCU core in this case. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a4a9c916ad3..ceaa95923a8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1904,11 +1904,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. */ - if (rcu_is_cpu_idle()) + if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) invoke_rcu_core(); - /* If interrupts were disabled, don't dive into RCU core. */ - if (irqs_disabled_flags(flags)) { + /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ + if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) { local_irq_restore(flags); return; } -- cgit v1.2.3-70-g09d2 From 29154c57e35a191c83b19c61b1935c9f21957662 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 30 May 2012 03:21:48 -0700 Subject: rcu: Split RCU core processing out of __call_rcu() The __call_rcu() function is a bit overweight, so this commit splits it into actual enqueuing of and accounting for the callback (__call_rcu()) and associated RCU-core processing (__call_rcu_core()). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 90 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 41 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ceaa95923a8..70c4da7d2a9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1861,45 +1861,12 @@ static void invoke_rcu_core(void) raise_softirq(RCU_SOFTIRQ); } -static void -__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), - struct rcu_state *rsp, bool lazy) +/* + * Handle any core-RCU processing required by a call_rcu() invocation. + */ +static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, + struct rcu_head *head, unsigned long flags) { - unsigned long flags; - struct rcu_data *rdp; - - WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ - debug_rcu_head_queue(head); - head->func = func; - head->next = NULL; - - smp_mb(); /* Ensure RCU update seen before callback registry. */ - - /* - * Opportunistically note grace-period endings and beginnings. - * Note that we might see a beginning right after we see an - * end, but never vice versa, since this CPU has to pass through - * a quiescent state betweentimes. - */ - local_irq_save(flags); - rdp = this_cpu_ptr(rsp->rda); - - /* Add the callback to our list. */ - ACCESS_ONCE(rdp->qlen)++; - if (lazy) - rdp->qlen_lazy++; - else - rcu_idle_count_callbacks_posted(); - smp_mb(); /* Count before adding callback for rcu_barrier(). */ - *rdp->nxttail[RCU_NEXT_TAIL] = head; - rdp->nxttail[RCU_NEXT_TAIL] = &head->next; - - if (__is_kfree_rcu_offset((unsigned long)func)) - trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, - rdp->qlen_lazy, rdp->qlen); - else - trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); - /* * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. @@ -1908,10 +1875,8 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), invoke_rcu_core(); /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ - if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) { - local_irq_restore(flags); + if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) return; - } /* * Force the grace period if too many callbacks or too long waiting. @@ -1944,6 +1909,49 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), } } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) force_quiescent_state(rsp, 1); +} + +static void +__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), + struct rcu_state *rsp, bool lazy) +{ + unsigned long flags; + struct rcu_data *rdp; + + WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ + debug_rcu_head_queue(head); + head->func = func; + head->next = NULL; + + smp_mb(); /* Ensure RCU update seen before callback registry. */ + + /* + * Opportunistically note grace-period endings and beginnings. + * Note that we might see a beginning right after we see an + * end, but never vice versa, since this CPU has to pass through + * a quiescent state betweentimes. + */ + local_irq_save(flags); + rdp = this_cpu_ptr(rsp->rda); + + /* Add the callback to our list. */ + ACCESS_ONCE(rdp->qlen)++; + if (lazy) + rdp->qlen_lazy++; + else + rcu_idle_count_callbacks_posted(); + smp_mb(); /* Count before adding callback for rcu_barrier(). */ + *rdp->nxttail[RCU_NEXT_TAIL] = head; + rdp->nxttail[RCU_NEXT_TAIL] = &head->next; + + if (__is_kfree_rcu_offset((unsigned long)func)) + trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, + rdp->qlen_lazy, rdp->qlen); + else + trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); + + /* Go handle any RCU core processing required. */ + __call_rcu_core(rsp, rdp, head, flags); local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From 95f0c1de3e6ed4383cc4b5f52ce4ecfb21026b49 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Jun 2012 11:58:27 -0700 Subject: rcu: Disable preemption in rcu_blocking_is_gp() It is time to optimize CONFIG_TREE_PREEMPT_RCU's synchronize_rcu() for uniprocessor optimization, which means that rcu_blocking_is_gp() can no longer rely on RCU read-side critical sections having disabled preemption. This commit therefore disables preemption across rcu_blocking_is_gp()'s scan of the cpu_online_mask. (Updated from previous version to fix embarrassing bug spotted by Wu Fengguang.) Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 70c4da7d2a9..e000a623e63 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1981,28 +1981,16 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); * occasionally incorrectly indicate that there are multiple CPUs online * when there was in fact only one the whole time, as this just adds * some overhead: RCU still operates correctly. - * - * Of course, sampling num_online_cpus() with preemption enabled can - * give erroneous results if there are concurrent CPU-hotplug operations. - * For example, given a demonic sequence of preemptions in num_online_cpus() - * and CPU-hotplug operations, there could be two or more CPUs online at - * all times, but num_online_cpus() might well return one (or even zero). - * - * However, all such demonic sequences require at least one CPU-offline - * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer - * is only a problem if there is an RCU read-side critical section executing - * throughout. But RCU-sched and RCU-bh read-side critical sections - * disable either preemption or bh, which prevents a CPU from going offline. - * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return - * that there is only one CPU when in fact there was more than one throughout - * is when there were no RCU readers in the system. If there are no - * RCU readers, the grace period by definition can be of zero length, - * regardless of the number of online CPUs. */ static inline int rcu_blocking_is_gp(void) { + int ret; + might_sleep(); /* Check for RCU read-side critical section. */ - return num_online_cpus() <= 1; + preempt_disable(); + ret = num_online_cpus() <= 1; + preempt_enable(); + return ret; } /** -- cgit v1.2.3-70-g09d2 From cf01537ecf192d9ff15c32a355db5d5af22bea4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Jun 2012 11:26:42 -0700 Subject: rcu: Add check for CPUs going offline with callbacks queued If a CPU goes offline with callbacks queued, those callbacks might be indefinitely postponed, which can result in a system hang. This commit therefore inserts warnings for this condition. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e000a623e63..95c7b61e77e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1509,6 +1509,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); if (need_report & RCU_OFL_TASKS_EXP_GP) rcu_report_exp_rnp(rsp, rnp, true); + WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, + "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", + cpu, rdp->qlen, rdp->nxtlist); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3-70-g09d2 From bf1304e9cd755be7814ac8365834b7a1d0f06b58 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 May 2012 15:55:39 -0700 Subject: rcu: Dump only the current CPU's buffers for idle-entry/exit warnings Problems in RCU idle entry and exit are almost always confined to the offending CPU. This commit therefore switches ftrace_dump() from DUMP_ALL to DUMP_ORIG. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- kernel/rcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4b97bba7396..6eb48f13eee 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -358,7 +358,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); - ftrace_dump(DUMP_ALL); + ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ @@ -468,7 +468,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) trace_rcu_dyntick("Error on exit: not idle task", oldval, rdtp->dynticks_nesting); - ftrace_dump(DUMP_ALL); + ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ -- cgit v1.2.3-70-g09d2 From cfca927972e31a5b3da49bf641c525732ff3c357 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 25 Jun 2012 12:54:17 -0700 Subject: rcu: Introduce check for callback list/count mismatch The recent bug that introduced the RCU callback list/count mismatch showed the need for a diagnostic to check for this, which this commit adds. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 95c7b61e77e..4154c9567a6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1612,6 +1612,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rdp->n_force_qs_snap = rsp->n_force_qs; } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = rdp->qlen; + WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); local_irq_restore(flags); -- cgit v1.2.3-70-g09d2 From c701d5d9b384ff03ceb232ef21236364d784a411 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jun 2012 08:08:25 -0700 Subject: rcu: Fix code-style issues involving "else" The Linux kernel coding style says that single-statement blocks should omit curly braces unless the other leg of the "if" statement has multiple statements, in which case the curly braces should be included. This commit fixes RCU's violations of this rule. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/rcutiny_plugin.h | 7 ++++--- kernel/rcutorture.c | 3 ++- kernel/rcutree.c | 7 ++++--- kernel/rcutree_plugin.h | 14 ++++++++------ 4 files changed, 18 insertions(+), 13 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 116725b5edf..918fd1e8509 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -350,8 +350,9 @@ static int rcu_initiate_boost(void) rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; invoke_rcu_callbacks(); - } else + } else { RCU_TRACE(rcu_initiate_boost_trace()); + } return 1; } @@ -778,9 +779,9 @@ void synchronize_rcu_expedited(void) rpcp->exp_tasks = NULL; /* Wait for tail of ->blkd_tasks list to drain. */ - if (!rcu_preempted_readers_exp()) + if (!rcu_preempted_readers_exp()) { local_irq_restore(flags); - else { + } else { rcu_initiate_boost(); local_irq_restore(flags); wait_event(sync_rcu_preempt_exp_wq, diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index c279ee92094..155fb129b64 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -408,8 +408,9 @@ rcu_torture_cb(struct rcu_head *p) if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { rp->rtort_mbtest = 0; rcu_torture_free(rp); - } else + } else { cur_ops->deferred_free(rp); + } } static int rcu_no_completed(void) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 117218a4372..f280e542e3e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -892,8 +892,9 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct if (rnp->qsmask & rdp->grpmask) { rdp->qs_pending = 1; rdp->passed_quiesce = 0; - } else + } else { rdp->qs_pending = 0; + } zero_cpu_stall_ticks(rdp); } } @@ -2130,9 +2131,9 @@ void synchronize_sched_expedited(void) put_online_cpus(); /* No joy, try again later. Or just synchronize_sched(). */ - if (trycount++ < 10) + if (trycount++ < 10) { udelay(trycount * num_online_cpus()); - else { + } else { synchronize_sched(); return; } diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a9194d5606c..7f3244c0df0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -390,8 +390,9 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grphi, !!rnp->gp_tasks); rcu_report_unblock_qs_rnp(rnp, flags); - } else + } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); + } #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ @@ -757,9 +758,9 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) int must_wait = 0; raw_spin_lock_irqsave(&rnp->lock, flags); - if (list_empty(&rnp->blkd_tasks)) + if (list_empty(&rnp->blkd_tasks)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); - else { + } else { rnp->exp_tasks = rnp->blkd_tasks.next; rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ must_wait = 1; @@ -803,9 +804,9 @@ void synchronize_rcu_expedited(void) * expedited grace period for us, just leave. */ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { - if (trycount++ < 10) + if (trycount++ < 10) { udelay(trycount * num_online_cpus()); - else { + } else { synchronize_rcu(); return; } @@ -2093,8 +2094,9 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_callbacks(cpu)) { trace_rcu_prep_idle("More callbacks"); invoke_rcu_core(); - } else + } else { trace_rcu_prep_idle("Callbacks drained"); + } } /* -- cgit v1.2.3-70-g09d2