summaryrefslogtreecommitdiffstats
path: root/kernel/rcupdate.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcupdate.c')
-rw-r--r--kernel/rcupdate.c190
1 files changed, 124 insertions, 66 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c4d159a21e0..0cf8146bd58 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
+#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/atomic.h>
@@ -45,26 +46,21 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
-#include <linux/rcuref.h>
#include <linux/cpu.h>
/* Definition for rcupdate control block. */
-struct rcu_ctrlblk rcu_ctrlblk =
- { .cur = -300, .completed = -300 };
-struct rcu_ctrlblk rcu_bh_ctrlblk =
- { .cur = -300, .completed = -300 };
-
-/* Bookkeeping of the progress of the grace period */
-struct rcu_state {
- spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */
- cpumask_t cpumask; /* CPUs that need to switch in order */
- /* for current batch to proceed. */
+struct rcu_ctrlblk rcu_ctrlblk = {
+ .cur = -300,
+ .completed = -300,
+ .lock = SPIN_LOCK_UNLOCKED,
+ .cpumask = CPU_MASK_NONE,
+};
+struct rcu_ctrlblk rcu_bh_ctrlblk = {
+ .cur = -300,
+ .completed = -300,
+ .lock = SPIN_LOCK_UNLOCKED,
+ .cpumask = CPU_MASK_NONE,
};
-
-static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
- {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
- {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -73,19 +69,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
static int maxbatch = 10000;
-#ifndef __HAVE_ARCH_CMPXCHG
-/*
- * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
- * 32 bit atomic_t implementations, and a hash function similar to that
- * for our refcounting needs.
- * Can't help multiprocessors which donot have cmpxchg :(
- */
-
-spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
- [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
-};
-#endif
-
/**
* call_rcu - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
@@ -116,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
local_irq_restore(flags);
}
+static atomic_t rcu_barrier_cpu_count;
+static struct semaphore rcu_barrier_sema;
+static struct completion rcu_barrier_completion;
+
/**
* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
* @head: structure to be used for queueing the RCU updates.
@@ -162,6 +149,42 @@ long rcu_batches_completed(void)
return rcu_ctrlblk.completed;
}
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+ if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+ complete(&rcu_barrier_completion);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *notused)
+{
+ int cpu = smp_processor_id();
+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+ struct rcu_head *head;
+
+ head = &rdp->barrier;
+ atomic_inc(&rcu_barrier_cpu_count);
+ call_rcu(head, rcu_barrier_callback);
+}
+
+/**
+ * rcu_barrier - Wait until all the in-flight RCUs are complete.
+ */
+void rcu_barrier(void)
+{
+ BUG_ON(in_interrupt());
+ /* Take cpucontrol semaphore to protect against CPU hotplug */
+ down(&rcu_barrier_sema);
+ init_completion(&rcu_barrier_completion);
+ atomic_set(&rcu_barrier_cpu_count, 0);
+ on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+ wait_for_completion(&rcu_barrier_completion);
+ up(&rcu_barrier_sema);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
/*
* Invoke the completed RCU callbacks. They are expected to be in
* a per-cpu list.
@@ -193,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
* This is done by rcu_start_batch. The start is not broadcasted to
* all cpus, they must pick this up by comparing rcp->cur with
* rdp->quiescbatch. All cpus are recorded in the
- * rcu_state.cpumask bitmap.
+ * rcu_ctrlblk.cpumask bitmap.
* - All cpus must go through a quiescent state.
* Since the start of the grace period is not broadcasted, at least two
* calls to rcu_check_quiescent_state are required:
* The first call just notices that a new grace period is running. The
* following calls check if there was a quiescent state since the beginning
- * of the grace period. If so, it updates rcu_state.cpumask. If
+ * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
* the bitmap is empty, then the grace period is completed.
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
* period (if necessary).
@@ -207,25 +230,29 @@ static void rcu_do_batch(struct rcu_data *rdp)
/*
* Register a new batch of callbacks, and start it up if there is currently no
* active batch and the batch to be registered has not already occurred.
- * Caller must hold rcu_state.lock.
+ * Caller must hold rcu_ctrlblk.lock.
*/
-static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
- int next_pending)
+static void rcu_start_batch(struct rcu_ctrlblk *rcp)
{
- if (next_pending)
- rcp->next_pending = 1;
-
if (rcp->next_pending &&
rcp->completed == rcp->cur) {
- /* Can't change, since spin lock held. */
- cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
-
rcp->next_pending = 0;
- /* next_pending == 0 must be visible in __rcu_process_callbacks()
- * before it can see new value of cur.
+ /*
+ * next_pending == 0 must be visible in
+ * __rcu_process_callbacks() before it can see new value of cur.
*/
smp_wmb();
rcp->cur++;
+
+ /*
+ * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
+ * Barrier Otherwise it can cause tickless idle CPUs to be
+ * included in rcp->cpumask, which will extend graceperiods
+ * unnecessarily.
+ */
+ smp_mb();
+ cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+
}
}
@@ -234,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
* Clear it from the cpu mask and complete the grace period if it was the last
* cpu. Start another grace period if someone has further entries pending
*/
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
{
- cpu_clear(cpu, rsp->cpumask);
- if (cpus_empty(rsp->cpumask)) {
+ cpu_clear(cpu, rcp->cpumask);
+ if (cpus_empty(rcp->cpumask)) {
/* batch completed ! */
rcp->completed = rcp->cur;
- rcu_start_batch(rcp, rsp, 0);
+ rcu_start_batch(rcp);
}
}
@@ -250,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
* quiescent cycle, then indicate that it has done so.
*/
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
- struct rcu_state *rsp, struct rcu_data *rdp)
+ struct rcu_data *rdp)
{
if (rdp->quiescbatch != rcp->cur) {
/* start new grace period: */
@@ -275,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
return;
rdp->qs_pending = 0;
- spin_lock(&rsp->lock);
+ spin_lock(&rcp->lock);
/*
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
* during cpu startup. Ignore the quiescent state.
*/
if (likely(rdp->quiescbatch == rcp->cur))
- cpu_quiet(rdp->cpu, rcp, rsp);
+ cpu_quiet(rdp->cpu, rcp);
- spin_unlock(&rsp->lock);
+ spin_unlock(&rcp->lock);
}
@@ -304,28 +331,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
}
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
- struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp)
+ struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
{
/* if the cpu going offline owns the grace period
* we can block indefinitely waiting for it, so flush
* it here
*/
- spin_lock_bh(&rsp->lock);
+ spin_lock_bh(&rcp->lock);
if (rcp->cur != rcp->completed)
- cpu_quiet(rdp->cpu, rcp, rsp);
- spin_unlock_bh(&rsp->lock);
+ cpu_quiet(rdp->cpu, rcp);
+ spin_unlock_bh(&rcp->lock);
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
-
+ rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
}
+
static void rcu_offline_cpu(int cpu)
{
struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
- __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,
+ __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
&per_cpu(rcu_data, cpu));
- __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,
+ __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
&per_cpu(rcu_bh_data, cpu));
put_cpu_var(rcu_data);
put_cpu_var(rcu_bh_data);
@@ -344,7 +372,7 @@ static void rcu_offline_cpu(int cpu)
* This does the RCU processing work from tasklet context.
*/
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
- struct rcu_state *rsp, struct rcu_data *rdp)
+ struct rcu_data *rdp)
{
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
*rdp->donetail = rdp->curlist;
@@ -374,24 +402,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
if (!rcp->next_pending) {
/* and start it/schedule start if it's a new batch */
- spin_lock(&rsp->lock);
- rcu_start_batch(rcp, rsp, 1);
- spin_unlock(&rsp->lock);
+ spin_lock(&rcp->lock);
+ rcp->next_pending = 1;
+ rcu_start_batch(rcp);
+ spin_unlock(&rcp->lock);
}
} else {
local_irq_enable();
}
- rcu_check_quiescent_state(rcp, rsp, rdp);
+ rcu_check_quiescent_state(rcp, rdp);
if (rdp->donelist)
rcu_do_batch(rdp);
}
static void rcu_process_callbacks(unsigned long unused)
{
- __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,
- &__get_cpu_var(rcu_data));
- __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,
- &__get_cpu_var(rcu_bh_data));
+ __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
+ __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+}
+
+static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+ /* This cpu has pending rcu entries and the grace period
+ * for them has completed.
+ */
+ if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
+ return 1;
+
+ /* This cpu has no pending entries, but there are new entries */
+ if (!rdp->curlist && rdp->nxtlist)
+ return 1;
+
+ /* This cpu has finished callbacks to invoke */
+ if (rdp->donelist)
+ return 1;
+
+ /* The rcu core waits for a quiescent state from the cpu */
+ if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
+ return 1;
+
+ /* nothing to do */
+ return 0;
+}
+
+int rcu_pending(int cpu)
+{
+ return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
+ __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
}
void rcu_check_callbacks(int cpu, int user)
@@ -457,6 +514,7 @@ static struct notifier_block __devinitdata rcu_nb = {
*/
void __init rcu_init(void)
{
+ sema_init(&rcu_barrier_sema, 1);
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
/* Register notifier for non-boot CPUs */