summaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 08:02:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 08:02:40 -0800
commit423d091dfe58d3109d84c408810a7cfa82f6f184 (patch)
tree43c4385d1dc7219582f924d42db1f3e203a577bd /include/linux
parent1483b3823542c9721eddf09a077af1e02ac96b50 (diff)
parent919b83452b2e7c1dbced0456015508b4b9585db3 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (64 commits) cpu: Export cpu_up() rcu: Apply ACCESS_ONCE() to rcu_boost() return value Revert "rcu: Permit rt_mutex_unlock() with irqs disabled" docs: Additional LWN links to RCU API rcu: Augment rcu_batch_end tracing for idle and callback state rcu: Add rcutorture tests for srcu_read_lock_raw() rcu: Make rcutorture test for hotpluggability before offlining CPUs driver-core/cpu: Expose hotpluggability to the rest of the kernel rcu: Remove redundant rcu_cpu_stall_suppress declaration rcu: Adaptive dyntick-idle preparation rcu: Keep invoking callbacks if CPU otherwise idle rcu: Irq nesting is always 0 on rcu_enter_idle_common rcu: Don't check irq nesting from rcu idle entry/exit rcu: Permit dyntick-idle with callbacks pending rcu: Document same-context read-side constraints rcu: Identify dyntick-idle CPUs on first force_quiescent_state() pass rcu: Remove dynticks false positives and RCU failures rcu: Reduce latency of rcu_prepare_for_idle() rcu: Eliminate RCU_FAST_NO_HZ grace-period hang rcu: Avoid needlessly IPIing CPUs at GP end ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cpu.h1
-rw-r--r--include/linux/hardirq.h21
-rw-r--r--include/linux/rcupdate.h115
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/srcu.h87
-rw-r--r--include/linux/tick.h11
6 files changed, 163 insertions, 80 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6cb60fd2ea8..305c263021e 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -27,6 +27,7 @@ struct cpu {
extern int register_cpu(struct cpu *cpu, int num);
extern struct sys_device *get_cpu_sysdev(unsigned cpu);
+extern bool cpu_is_hotpluggable(unsigned cpu);
extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr);
extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f743883f769..bb7f3097185 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
extern void account_system_vtime(struct task_struct *tsk);
#endif
-#if defined(CONFIG_NO_HZ)
#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-
-static inline void rcu_irq_enter(void)
-{
- rcu_exit_nohz();
-}
-
-static inline void rcu_irq_exit(void)
-{
- rcu_enter_nohz();
-}
static inline void rcu_nmi_enter(void)
{
@@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void)
}
#else
-extern void rcu_irq_enter(void);
-extern void rcu_irq_exit(void);
extern void rcu_nmi_enter(void);
extern void rcu_nmi_exit(void);
#endif
-#else
-# define rcu_irq_enter() do { } while (0)
-# define rcu_irq_exit() do { } while (0)
-# define rcu_nmi_enter() do { } while (0)
-# define rcu_nmi_exit() do { } while (0)
-#endif /* #if defined(CONFIG_NO_HZ) */
/*
* It is safe to do non-atomic ops on ->hardirq_context,
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2cf4226ade7..81c04f4348e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,6 +51,8 @@ extern int rcutorture_runnable; /* for sysctl */
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
extern void rcutorture_record_test_transition(void);
extern void rcutorture_record_progress(unsigned long vernum);
+extern void do_trace_rcu_torture_read(char *rcutorturename,
+ struct rcu_head *rhp);
#else
static inline void rcutorture_record_test_transition(void)
{
@@ -58,6 +60,12 @@ static inline void rcutorture_record_test_transition(void)
static inline void rcutorture_record_progress(unsigned long vernum)
{
}
+#ifdef CONFIG_RCU_TRACE
+extern void do_trace_rcu_torture_read(char *rcutorturename,
+ struct rcu_head *rhp);
+#else
+#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#endif
#endif
#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
@@ -177,23 +185,10 @@ extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_check_callbacks(int cpu, int user);
struct notifier_block;
-
-#ifdef CONFIG_NO_HZ
-
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-
-#else /* #ifdef CONFIG_NO_HZ */
-
-static inline void rcu_enter_nohz(void)
-{
-}
-
-static inline void rcu_exit_nohz(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_NO_HZ */
+extern void rcu_idle_enter(void);
+extern void rcu_idle_exit(void);
+extern void rcu_irq_enter(void);
+extern void rcu_irq_exit(void);
/*
* Infrastructure to implement the synchronize_() primitives in
@@ -233,22 +228,30 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern struct lockdep_map rcu_lock_map;
-# define rcu_read_acquire() \
- lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#ifdef CONFIG_PROVE_RCU
+extern int rcu_is_cpu_idle(void);
+#else /* !CONFIG_PROVE_RCU */
+static inline int rcu_is_cpu_idle(void)
+{
+ return 0;
+}
+#endif /* else !CONFIG_PROVE_RCU */
-extern struct lockdep_map rcu_bh_lock_map;
-# define rcu_read_acquire_bh() \
- lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_)
+static inline void rcu_lock_acquire(struct lockdep_map *map)
+{
+ WARN_ON_ONCE(rcu_is_cpu_idle());
+ lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
+}
-extern struct lockdep_map rcu_sched_lock_map;
-# define rcu_read_acquire_sched() \
- lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release_sched() \
- lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+static inline void rcu_lock_release(struct lockdep_map *map)
+{
+ WARN_ON_ONCE(rcu_is_cpu_idle());
+ lock_release(map, 1, _THIS_IP_);
+}
+extern struct lockdep_map rcu_lock_map;
+extern struct lockdep_map rcu_bh_lock_map;
+extern struct lockdep_map rcu_sched_lock_map;
extern int debug_lockdep_rcu_enabled(void);
/**
@@ -262,11 +265,18 @@ extern int debug_lockdep_rcu_enabled(void);
*
* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
*/
static inline int rcu_read_lock_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
+ if (rcu_is_cpu_idle())
+ return 0;
return lock_is_held(&rcu_lock_map);
}
@@ -290,6 +300,19 @@ extern int rcu_read_lock_bh_held(void);
*
* Check debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
+ *
+ * Note that if the CPU is in the idle loop from an RCU point of
+ * view (ie: that we are in the section between rcu_idle_enter() and
+ * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
+ * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
+ * that are in such a section, considering these as in extended quiescent
+ * state, so such a CPU is effectively never in an RCU read-side critical
+ * section regardless of what RCU primitives it invokes. This state of
+ * affairs is required --- we need to keep an RCU-free window in idle
+ * where the CPU may possibly enter into low power mode. This way we can
+ * notice an extended quiescent state to other CPUs that started a grace
+ * period. Otherwise we would delay any grace period as long as we run in
+ * the idle task.
*/
#ifdef CONFIG_PREEMPT_COUNT
static inline int rcu_read_lock_sched_held(void)
@@ -298,6 +321,8 @@ static inline int rcu_read_lock_sched_held(void)
if (!debug_lockdep_rcu_enabled())
return 1;
+ if (rcu_is_cpu_idle())
+ return 0;
if (debug_locks)
lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
@@ -311,12 +336,8 @@ static inline int rcu_read_lock_sched_held(void)
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-# define rcu_read_acquire() do { } while (0)
-# define rcu_read_release() do { } while (0)
-# define rcu_read_acquire_bh() do { } while (0)
-# define rcu_read_release_bh() do { } while (0)
-# define rcu_read_acquire_sched() do { } while (0)
-# define rcu_read_release_sched() do { } while (0)
+# define rcu_lock_acquire(a) do { } while (0)
+# define rcu_lock_release(a) do { } while (0)
static inline int rcu_read_lock_held(void)
{
@@ -637,7 +658,7 @@ static inline void rcu_read_lock(void)
{
__rcu_read_lock();
__acquire(RCU);
- rcu_read_acquire();
+ rcu_lock_acquire(&rcu_lock_map);
}
/*
@@ -657,7 +678,7 @@ static inline void rcu_read_lock(void)
*/
static inline void rcu_read_unlock(void)
{
- rcu_read_release();
+ rcu_lock_release(&rcu_lock_map);
__release(RCU);
__rcu_read_unlock();
}
@@ -673,12 +694,17 @@ static inline void rcu_read_unlock(void)
* critical sections in interrupt context can use just rcu_read_lock(),
* though this should at least be commented to avoid confusing people
* reading the code.
+ *
+ * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
+ * must occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
+ * was invoked from some other task.
*/
static inline void rcu_read_lock_bh(void)
{
local_bh_disable();
__acquire(RCU_BH);
- rcu_read_acquire_bh();
+ rcu_lock_acquire(&rcu_bh_lock_map);
}
/*
@@ -688,7 +714,7 @@ static inline void rcu_read_lock_bh(void)
*/
static inline void rcu_read_unlock_bh(void)
{
- rcu_read_release_bh();
+ rcu_lock_release(&rcu_bh_lock_map);
__release(RCU_BH);
local_bh_enable();
}
@@ -700,12 +726,17 @@ static inline void rcu_read_unlock_bh(void)
* are being done using call_rcu_sched() or synchronize_rcu_sched().
* Read-side critical sections can also be introduced by anything that
* disables preemption, including local_irq_disable() and friends.
+ *
+ * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
+ * must occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock_sched() from process context if the matching
+ * rcu_read_lock_sched() was invoked from an NMI handler.
*/
static inline void rcu_read_lock_sched(void)
{
preempt_disable();
__acquire(RCU_SCHED);
- rcu_read_acquire_sched();
+ rcu_lock_acquire(&rcu_sched_lock_map);
}
/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -722,7 +753,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
*/
static inline void rcu_read_unlock_sched(void)
{
- rcu_read_release_sched();
+ rcu_lock_release(&rcu_sched_lock_map);
__release(RCU_SCHED);
preempt_enable();
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc..4a7e4d333a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2070,6 +2070,14 @@ extern int sched_setscheduler(struct task_struct *, int,
extern int sched_setscheduler_nocheck(struct task_struct *, int,
const struct sched_param *);
extern struct task_struct *idle_task(int cpu);
+/**
+ * is_idle_task - is the specified task an idle task?
+ * @tsk: the task in question.
+ */
+static inline bool is_idle_task(struct task_struct *p)
+{
+ return p->pid == 0;
+}
extern struct task_struct *curr_task(int cpu);
extern void set_curr_task(int cpu, struct task_struct *p);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 58971e891f4..e1b005918bb 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -28,6 +28,7 @@
#define _LINUX_SRCU_H
#include <linux/mutex.h>
+#include <linux/rcupdate.h>
struct srcu_struct_array {
int c[2];
@@ -60,18 +61,10 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
__init_srcu_struct((sp), #sp, &__srcu_key); \
})
-# define srcu_read_acquire(sp) \
- lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define srcu_read_release(sp) \
- lock_release(&(sp)->dep_map, 1, _THIS_IP_)
-
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
int init_srcu_struct(struct srcu_struct *sp);
-# define srcu_read_acquire(sp) do { } while (0)
-# define srcu_read_release(sp) do { } while (0)
-
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
void cleanup_srcu_struct(struct srcu_struct *sp);
@@ -90,12 +83,32 @@ long srcu_batches_completed(struct srcu_struct *sp);
* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
* this assumes we are in an SRCU read-side critical section unless it can
* prove otherwise.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that if the CPU is in the idle loop from an RCU point of view
+ * (ie: that we are in the section between rcu_idle_enter() and
+ * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
+ * the CPU did an srcu_read_lock(). The reason for this is that RCU
+ * ignores CPUs that are in such a section, considering these as in
+ * extended quiescent state, so such a CPU is effectively never in an
+ * RCU read-side critical section regardless of what RCU primitives it
+ * invokes. This state of affairs is required --- we need to keep an
+ * RCU-free window in idle where the CPU may possibly enter into low
+ * power mode. This way we can notice an extended quiescent state to
+ * other CPUs that started a grace period. Otherwise we would delay any
+ * grace period as long as we run in the idle task.
*/
static inline int srcu_read_lock_held(struct srcu_struct *sp)
{
- if (debug_locks)
- return lock_is_held(&sp->dep_map);
- return 1;
+ if (rcu_is_cpu_idle())
+ return 0;
+
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+
+ return lock_is_held(&sp->dep_map);
}
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -145,12 +158,17 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
* one way to indirectly wait on an SRCU grace period is to acquire
* a mutex that is held elsewhere while calling synchronize_srcu() or
* synchronize_srcu_expedited().
+ *
+ * Note that srcu_read_lock() and the matching srcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
+ * was invoked in process context.
*/
static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
{
int retval = __srcu_read_lock(sp);
- srcu_read_acquire(sp);
+ rcu_lock_acquire(&(sp)->dep_map);
return retval;
}
@@ -164,8 +182,51 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
__releases(sp)
{
- srcu_read_release(sp);
+ rcu_lock_release(&(sp)->dep_map);
+ __srcu_read_unlock(sp, idx);
+}
+
+/**
+ * srcu_read_lock_raw - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section. Similar to srcu_read_lock(),
+ * but avoids the RCU-lockdep checking. This means that it is legal to
+ * use srcu_read_lock_raw() in one context, for example, in an exception
+ * handler, and then have the matching srcu_read_unlock_raw() in another
+ * context, for example in the task that took the exception.
+ *
+ * However, the entire SRCU read-side critical section must reside within a
+ * single task. For example, beware of using srcu_read_lock_raw() in
+ * a device interrupt handler and srcu_read_unlock() in the interrupted
+ * task: This will not work if interrupts are threaded.
+ */
+static inline int srcu_read_lock_raw(struct srcu_struct *sp)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __srcu_read_lock(sp);
+ local_irq_restore(flags);
+ return ret;
+}
+
+/**
+ * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock_raw().
+ *
+ * Exit an SRCU read-side critical section without lockdep-RCU checking.
+ * See srcu_read_lock_raw() for more details.
+ */
+static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
__srcu_read_unlock(sp, idx);
+ local_irq_restore(flags);
}
#endif
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc0ee2..ab8be90b5cc 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -7,6 +7,7 @@
#define _LINUX_TICK_H
#include <linux/clockchips.h>
+#include <linux/irqflags.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -121,14 +122,16 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
# ifdef CONFIG_NO_HZ
-extern void tick_nohz_stop_sched_tick(int inidle);
-extern void tick_nohz_restart_sched_tick(void);
+extern void tick_nohz_idle_enter(void);
+extern void tick_nohz_idle_exit(void);
+extern void tick_nohz_irq_exit(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
# else
-static inline void tick_nohz_stop_sched_tick(int inidle) { }
-static inline void tick_nohz_restart_sched_tick(void) { }
+static inline void tick_nohz_idle_enter(void) { }
+static inline void tick_nohz_idle_exit(void) { }
+
static inline ktime_t tick_nohz_get_sleep_length(void)
{
ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };