summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile11
-rw-r--r--kernel/cgroup.c14
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/events/ring_buffer.c31
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/lockdep.c4
-rw-r--r--kernel/mutex.c32
-rw-r--r--kernel/pid.c5
-rw-r--r--kernel/power/hibernate.c2
-rw-r--r--kernel/power/snapshot.c5
-rw-r--r--kernel/power/user.c8
-rw-r--r--kernel/rcu/Makefile6
-rw-r--r--kernel/rcu/rcu.h (renamed from kernel/rcu.h)7
-rw-r--r--kernel/rcu/srcu.c (renamed from kernel/srcu.c)0
-rw-r--r--kernel/rcu/tiny.c (renamed from kernel/rcutiny.c)37
-rw-r--r--kernel/rcu/tiny_plugin.h (renamed from kernel/rcutiny_plugin.h)0
-rw-r--r--kernel/rcu/torture.c (renamed from kernel/rcutorture.c)6
-rw-r--r--kernel/rcu/tree.c (renamed from kernel/rcutree.c)185
-rw-r--r--kernel/rcu/tree.h (renamed from kernel/rcutree.h)2
-rw-r--r--kernel/rcu/tree_plugin.h (renamed from kernel/rcutree_plugin.h)84
-rw-r--r--kernel/rcu/tree_trace.c (renamed from kernel/rcutree_trace.c)2
-rw-r--r--kernel/rcu/update.c (renamed from kernel/rcupdate.c)10
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/clockevents.c65
-rw-r--r--kernel/trace/trace_event_perf.c2
25 files changed, 389 insertions, 158 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 1ce47553fb0..f99d908b555 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -6,9 +6,9 @@ obj-y = fork.o exec_domain.o panic.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
- rcupdate.o extable.o params.o posix-timers.o \
+ extable.o params.o posix-timers.o \
kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
+ hrtimer.o rwsem.o nsproxy.o semaphore.o \
notifier.o ksysfs.o cred.o reboot.o \
async.o range.o groups.o lglock.o smpboot.o
@@ -27,6 +27,7 @@ obj-y += power/
obj-y += printk/
obj-y += cpu/
obj-y += irq/
+obj-y += rcu/
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
@@ -81,12 +82,6 @@ obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_SECCOMP) += seccomp.o
-obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
-obj-$(CONFIG_TREE_RCU) += rcutree.o
-obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
-obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
-obj-$(CONFIG_TINY_RCU) += rcutiny.o
-obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2418b6e71a8..8bd9cfdc70d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2039,7 +2039,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
/* @tsk either already exited or can't exit until the end */
if (tsk->flags & PF_EXITING)
- continue;
+ goto next;
/* as per above, nr_threads may decrease, but not increase. */
BUG_ON(i >= group_size);
@@ -2047,7 +2047,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
ent.cgrp = task_cgroup_from_root(tsk, root);
/* nothing to do if this task is already in the cgroup */
if (ent.cgrp == cgrp)
- continue;
+ goto next;
/*
* saying GFP_ATOMIC has no effect here because we did prealloc
* earlier, but it's good form to communicate our expectations.
@@ -2055,7 +2055,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
BUG_ON(retval != 0);
i++;
-
+ next:
if (!threadgroup)
break;
} while_each_thread(leader, tsk);
@@ -3188,11 +3188,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
WARN_ON_ONCE(!rcu_read_lock_held());
- /* if first iteration, visit the leftmost descendant */
- if (!pos) {
- next = css_leftmost_descendant(root);
- return next != root ? next : NULL;
- }
+ /* if first iteration, visit leftmost descendant which may be @root */
+ if (!pos)
+ return css_leftmost_descendant(root);
/* if we visited @root, we're done */
if (pos == root)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cb4238e85b3..663f43a20f7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6292,6 +6292,7 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
}
+static DEVICE_ATTR_RO(type);
static ssize_t
perf_event_mux_interval_ms_show(struct device *dev,
@@ -6336,17 +6337,19 @@ perf_event_mux_interval_ms_store(struct device *dev,
return count;
}
+static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
-static struct device_attribute pmu_dev_attrs[] = {
- __ATTR_RO(type),
- __ATTR_RW(perf_event_mux_interval_ms),
- __ATTR_NULL,
+static struct attribute *pmu_dev_attrs[] = {
+ &dev_attr_type.attr,
+ &dev_attr_perf_event_mux_interval_ms.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(pmu_dev);
static int pmu_bus_running;
static struct bus_type pmu_bus = {
.name = "event_source",
- .dev_attrs = pmu_dev_attrs,
+ .dev_groups = pmu_dev_groups,
};
static void pmu_dev_release(struct device *dev)
@@ -6767,6 +6770,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (ret)
return -EFAULT;
+ /* disabled for now */
+ if (attr->mmap2)
+ return -EINVAL;
+
if (attr->__reserved_1)
return -EINVAL;
@@ -7234,15 +7241,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
perf_remove_from_context(event);
unaccount_event_cpu(event, src_cpu);
put_ctx(src_ctx);
- list_add(&event->event_entry, &events);
+ list_add(&event->migrate_entry, &events);
}
mutex_unlock(&src_ctx->mutex);
synchronize_rcu();
mutex_lock(&dst_ctx->mutex);
- list_for_each_entry_safe(event, tmp, &events, event_entry) {
- list_del(&event->event_entry);
+ list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+ list_del(&event->migrate_entry);
if (event->state >= PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_INACTIVE;
account_event_cpu(event, dst_cpu);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index cd55144270b..9c2ddfbf452 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -87,10 +87,31 @@ again:
goto out;
/*
- * Publish the known good head. Rely on the full barrier implied
- * by atomic_dec_and_test() order the rb->head read and this
- * write.
+ * Since the mmap() consumer (userspace) can run on a different CPU:
+ *
+ * kernel user
+ *
+ * READ ->data_tail READ ->data_head
+ * smp_mb() (A) smp_rmb() (C)
+ * WRITE $data READ $data
+ * smp_wmb() (B) smp_mb() (D)
+ * STORE ->data_head WRITE ->data_tail
+ *
+ * Where A pairs with D, and B pairs with C.
+ *
+ * I don't think A needs to be a full barrier because we won't in fact
+ * write data until we see the store from userspace. So we simply don't
+ * issue the data WRITE until we observe it. Be conservative for now.
+ *
+ * OTOH, D needs to be a full barrier since it separates the data READ
+ * from the tail WRITE.
+ *
+ * For B a WMB is sufficient since it separates two WRITEs, and for C
+ * an RMB is sufficient since it separates two READs.
+ *
+ * See perf_output_begin().
*/
+ smp_wmb();
rb->user_page->data_head = head;
/*
@@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle,
* Userspace could choose to issue a mb() before updating the
* tail pointer. So that all reads will be completed before the
* write is issued.
+ *
+ * See perf_output_put_handle().
*/
tail = ACCESS_ONCE(rb->user_page->data_tail);
- smp_rmb();
+ smp_mb();
offset = head = local_read(&rb->head);
head += size;
if (unlikely(!perf_output_space(rb, tail, offset, head)))
diff --git a/kernel/kmod.c b/kernel/kmod.c
index fb326365b69..b086006c59e 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
+ if (!sub_info->path) {
+ call_usermodehelper_freeinfo(sub_info);
+ return -EINVAL;
+ }
helper_lock();
if (!khelper_wq || usermodehelper_disabled) {
retval = -EBUSY;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e16c45b9ee7..4e8e14c34e4 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
!rcu_lockdep_current_cpu_online()
? "RCU used illegally from offline CPU!\n"
- : rcu_is_cpu_idle()
+ : !rcu_is_watching()
? "RCU used illegally from idle CPU!\n"
: "",
rcu_scheduler_active, debug_locks);
@@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
* So complain bitterly if someone does call rcu_read_lock(),
* rcu_read_lock_bh() and so on from extended quiescent states.
*/
- if (rcu_is_cpu_idle())
+ if (!rcu_is_watching())
printk("RCU used illegally from extended quiescent state!\n");
lockdep_print_held_locks(curr);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 6d647aedffe..d24105b1b79 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -410,7 +410,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
static __always_inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
- struct ww_acquire_ctx *ww_ctx)
+ struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
@@ -450,7 +450,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct task_struct *owner;
struct mspin_node node;
- if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) {
+ if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
@@ -480,7 +480,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
if ((atomic_read(&lock->count) == 1) &&
(atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
lock_acquired(&lock->dep_map, ip);
- if (!__builtin_constant_p(ww_ctx == NULL)) {
+ if (use_ww_ctx) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
@@ -551,7 +551,7 @@ slowpath:
goto err;
}
- if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) {
+ if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
@@ -575,7 +575,7 @@ skip_wait:
lock_acquired(&lock->dep_map, ip);
mutex_set_owner(lock);
- if (!__builtin_constant_p(ww_ctx == NULL)) {
+ if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct mutex_waiter *cur;
@@ -615,7 +615,7 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
- subclass, NULL, _RET_IP_, NULL);
+ subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -625,7 +625,7 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
{
might_sleep();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
- 0, nest, _RET_IP_, NULL);
+ 0, nest, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
@@ -635,7 +635,7 @@ mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
return __mutex_lock_common(lock, TASK_KILLABLE,
- subclass, NULL, _RET_IP_, NULL);
+ subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
@@ -644,7 +644,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
- subclass, NULL, _RET_IP_, NULL);
+ subclass, NULL, _RET_IP_, NULL, 0);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -682,7 +682,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
- 0, &ctx->dep_map, _RET_IP_, ctx);
+ 0, &ctx->dep_map, _RET_IP_, ctx, 1);
if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
@@ -697,7 +697,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
might_sleep();
ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
- 0, &ctx->dep_map, _RET_IP_, ctx);
+ 0, &ctx->dep_map, _RET_IP_, ctx, 1);
if (!ret && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
@@ -809,28 +809,28 @@ __mutex_lock_slowpath(atomic_t *lock_count)
struct mutex *lock = container_of(lock_count, struct mutex, count);
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
- NULL, _RET_IP_, NULL);
+ NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock)
{
return __mutex_lock_common(lock, TASK_KILLABLE, 0,
- NULL, _RET_IP_, NULL);
+ NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock)
{
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0,
- NULL, _RET_IP_, NULL);
+ NULL, _RET_IP_, NULL, 0);
}
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0,
- NULL, _RET_IP_, ctx);
+ NULL, _RET_IP_, ctx, 1);
}
static noinline int __sched
@@ -838,7 +838,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
- NULL, _RET_IP_, ctx);
+ NULL, _RET_IP_, ctx, 1);
}
#endif
diff --git a/kernel/pid.c b/kernel/pid.c
index ebe5e80b10f..9b9a2669814 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -273,6 +273,11 @@ void free_pid(struct pid *pid)
*/
wake_up_process(ns->child_reaper);
break;
+ case PIDNS_HASH_ADDING:
+ /* Handle a fork failure of the first process */
+ WARN_ON(ns->child_reaper);
+ ns->nr_hashed = 0;
+ /* fall through */
case 0:
schedule_work(&ns->proc_work);
break;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c9c759d5a15..0121dab83f4 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -846,7 +846,7 @@ static int software_resume(void)
goto Finish;
}
-late_initcall(software_resume);
+late_initcall_sync(software_resume);
static const char * const hibernation_modes[] = {
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 358a146fd4d..98c3b34a4cf 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -743,7 +743,10 @@ int create_basic_memory_bitmaps(void)
struct memory_bitmap *bm1, *bm2;
int error = 0;
- BUG_ON(forbidden_pages_map || free_pages_map);
+ if (forbidden_pages_map && free_pages_map)
+ return 0;
+ else
+ BUG_ON(forbidden_pages_map || free_pages_map);
bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
if (!bm1)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 72e8f4fd616..957f06164ad 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -39,6 +39,7 @@ static struct snapshot_data {
char frozen;
char ready;
char platform_support;
+ bool free_bitmaps;
} snapshot_state;
atomic_t snapshot_device_available = ATOMIC_INIT(1);
@@ -82,6 +83,10 @@ static int snapshot_open(struct inode *inode, struct file *filp)
data->swap = -1;
data->mode = O_WRONLY;
error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (!error) {
+ error = create_basic_memory_bitmaps();
+ data->free_bitmaps = !error;
+ }
if (error)
pm_notifier_call_chain(PM_POST_RESTORE);
}
@@ -111,6 +116,8 @@ static int snapshot_release(struct inode *inode, struct file *filp)
pm_restore_gfp_mask();
free_basic_memory_bitmaps();
thaw_processes();
+ } else if (data->free_bitmaps) {
+ free_basic_memory_bitmaps();
}
pm_notifier_call_chain(data->mode == O_RDONLY ?
PM_POST_HIBERNATION : PM_POST_RESTORE);
@@ -231,6 +238,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
break;
pm_restore_gfp_mask();
free_basic_memory_bitmaps();
+ data->free_bitmaps = false;
thaw_processes();
data->frozen = 0;
break;
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
new file mode 100644
index 00000000000..01e9ec37a3e
--- /dev/null
+++ b/kernel/rcu/Makefile
@@ -0,0 +1,6 @@
+obj-y += update.o srcu.o
+obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o
+obj-$(CONFIG_TREE_RCU) += tree.o
+obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o
+obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
+obj-$(CONFIG_TINY_RCU) += tiny.o
diff --git a/kernel/rcu.h b/kernel/rcu/rcu.h
index 77131966c4a..7859a0a3951 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -122,4 +122,11 @@ int rcu_jiffies_till_stall_check(void);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+/*
+ * Strings used in tracepoints need to be exported via the
+ * tracing system such that tools like perf and trace-cmd can
+ * translate the string address pointers to actual text.
+ */
+#define TPS(x) tracepoint_string(x)
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/srcu.c b/kernel/rcu/srcu.c
index 01d5ccb8bfe..01d5ccb8bfe 100644
--- a/kernel/srcu.c
+++ b/kernel/rcu/srcu.c
diff --git a/kernel/rcutiny.c b/kernel/rcu/tiny.c
index 9ed6075dc56..0c9a934cfec 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcu/tiny.c
@@ -35,6 +35,7 @@
#include <linux/time.h>
#include <linux/cpu.h>
#include <linux/prefetch.h>
+#include <linux/ftrace_event.h>
#ifdef CONFIG_RCU_TRACE
#include <trace/events/rcu.h>
@@ -42,7 +43,7 @@
#include "rcu.h"
-/* Forward declarations for rcutiny_plugin.h. */
+/* Forward declarations for tiny_plugin.h. */
struct rcu_ctrlblk;
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static void rcu_process_callbacks(struct softirq_action *unused);
@@ -52,22 +53,23 @@ static void __call_rcu(struct rcu_head *head,
static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
-#include "rcutiny_plugin.h"
+#include "tiny_plugin.h"
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
static void rcu_idle_enter_common(long long newval)
{
if (newval) {
- RCU_TRACE(trace_rcu_dyntick("--=",
+ RCU_TRACE(trace_rcu_dyntick(TPS("--="),
rcu_dynticks_nesting, newval));
rcu_dynticks_nesting = newval;
return;
}
- RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval));
+ RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
+ rcu_dynticks_nesting, newval));
if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
+ struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
- RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
+ RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
rcu_dynticks_nesting, newval));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@ -120,15 +122,15 @@ EXPORT_SYMBOL_GPL(rcu_irq_exit);
static void rcu_idle_exit_common(long long oldval)
{
if (oldval) {
- RCU_TRACE(trace_rcu_dyntick("++=",
+ RCU_TRACE(trace_rcu_dyntick(TPS("++="),
oldval, rcu_dynticks_nesting));
return;
}
- RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting));
+ RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
+ struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
- RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
+ RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
oldval, rcu_dynticks_nesting));
ftrace_dump(DUMP_ALL);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@ -174,18 +176,18 @@ void rcu_irq_enter(void)
}
EXPORT_SYMBOL_GPL(rcu_irq_enter);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
/*
* Test whether RCU thinks that the current CPU is idle.
*/
-int rcu_is_cpu_idle(void)
+bool __rcu_is_watching(void)
{
- return !rcu_dynticks_nesting;
+ return rcu_dynticks_nesting;
}
-EXPORT_SYMBOL(rcu_is_cpu_idle);
+EXPORT_SYMBOL(__rcu_is_watching);
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
/*
* Test whether the current CPU was interrupted from idle. Nested
@@ -273,7 +275,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
if (&rcp->rcucblist == rcp->donetail) {
RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
- ACCESS_ONCE(rcp->rcucblist),
+ !!ACCESS_ONCE(rcp->rcucblist),
need_resched(),
is_idle_task(current),
false));
@@ -304,7 +306,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
RCU_TRACE(cb_count++);
}
RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
- RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
+ RCU_TRACE(trace_rcu_batch_end(rcp->name,
+ cb_count, 0, need_resched(),
is_idle_task(current),
false));
}
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 280d06cae35..280d06cae35 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
diff --git a/kernel/rcutorture.c b/kernel/rcu/torture.c
index be63101c617..3929cd45151 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcu/torture.c
@@ -52,6 +52,12 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>");
+MODULE_ALIAS("rcutorture");
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "rcutorture."
+
static int fqs_duration;
module_param(fqs_duration, int, 0444);
MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable");
diff --git a/kernel/rcutree.c b/kernel/rcu/tree.c
index 32618b3fe4e..8a2c81e86dd 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcu/tree.c
@@ -41,6 +41,7 @@
#include <linux/export.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
+#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
@@ -56,17 +57,16 @@
#include <linux/ftrace_event.h>
#include <linux/suspend.h>
-#include "rcutree.h"
+#include "tree.h"
#include <trace/events/rcu.h>
#include "rcu.h"
-/*
- * Strings used in tracepoints need to be exported via the
- * tracing system such that tools like perf and trace-cmd can
- * translate the string address pointers to actual text.
- */
-#define TPS(x) tracepoint_string(x)
+MODULE_ALIAS("rcutree");
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "rcutree."
/* Data structures. */
@@ -222,7 +222,7 @@ void rcu_note_context_switch(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
-DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
+static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks = ATOMIC_INIT(1),
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
@@ -371,7 +371,8 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
{
trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
+ struct task_struct *idle __maybe_unused =
+ idle_task(smp_processor_id());
trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
ftrace_dump(DUMP_ORIG);
@@ -407,7 +408,7 @@ static void rcu_eqs_enter(bool user)
long long oldval;
struct rcu_dynticks *rdtp;
- rdtp = &__get_cpu_var(rcu_dynticks);
+ rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
@@ -435,7 +436,7 @@ void rcu_idle_enter(void)
local_irq_save(flags);
rcu_eqs_enter(false);
- rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
+ rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -478,7 +479,7 @@ void rcu_irq_exit(void)
struct rcu_dynticks *rdtp;
local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
+ rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
@@ -508,7 +509,8 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
rcu_cleanup_after_idle(smp_processor_id());
trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
+ struct task_struct *idle __maybe_unused =
+ idle_task(smp_processor_id());
trace_rcu_dyntick(TPS("Error on exit: not idle task"),
oldval, rdtp->dynticks_nesting);
@@ -528,7 +530,7 @@ static void rcu_eqs_exit(bool user)
struct rcu_dynticks *rdtp;
long long oldval;
- rdtp = &__get_cpu_var(rcu_dynticks);
+ rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
WARN_ON_ONCE(oldval < 0);
if (oldval & DYNTICK_TASK_NEST_MASK)
@@ -555,7 +557,7 @@ void rcu_idle_exit(void)
local_irq_save(flags);
rcu_eqs_exit(false);
- rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
+ rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0);
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -599,7 +601,7 @@ void rcu_irq_enter(void)
long long oldval;
local_irq_save(flags);
- rdtp = &__get_cpu_var(rcu_dynticks);
+ rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
@@ -620,7 +622,7 @@ void rcu_irq_enter(void)
*/
void rcu_nmi_enter(void)
{
- struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+ struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
if (rdtp->dynticks_nmi_nesting == 0 &&
(atomic_read(&rdtp->dynticks) & 0x1))
@@ -642,7 +644,7 @@ void rcu_nmi_enter(void)
*/
void rcu_nmi_exit(void)
{
- struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+ struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
if (rdtp->dynticks_nmi_nesting == 0 ||
--rdtp->dynticks_nmi_nesting != 0)
@@ -655,21 +657,34 @@ void rcu_nmi_exit(void)
}
/**
- * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
+ * __rcu_is_watching - are RCU read-side critical sections safe?
+ *
+ * Return true if RCU is watching the running CPU, which means that
+ * this CPU can safely enter RCU read-side critical sections. Unlike
+ * rcu_is_watching(), the caller of __rcu_is_watching() must have at
+ * least disabled preemption.
+ */
+bool __rcu_is_watching(void)
+{
+ return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1;
+}
+
+/**
+ * rcu_is_watching - see if RCU thinks that the current CPU is idle
*
* If the current CPU is in its idle loop and is neither in an interrupt
* or NMI handler, return true.
*/
-int rcu_is_cpu_idle(void)
+bool rcu_is_watching(void)
{
int ret;
preempt_disable();
- ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
+ ret = __rcu_is_watching();
preempt_enable();
return ret;
}
-EXPORT_SYMBOL(rcu_is_cpu_idle);
+EXPORT_SYMBOL_GPL(rcu_is_watching);
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
@@ -703,7 +718,7 @@ bool rcu_lockdep_current_cpu_online(void)
if (in_nmi())
return 1;
preempt_disable();
- rdp = &__get_cpu_var(rcu_sched_data);
+ rdp = this_cpu_ptr(&rcu_sched_data);
rnp = rdp->mynode;
ret = (rdp->grpmask & rnp->qsmaskinit) ||
!rcu_scheduler_fully_active;
@@ -723,7 +738,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
- return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
+ return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
}
/*
@@ -802,8 +817,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
- rsp->gp_start = jiffies;
- rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+ unsigned long j = ACCESS_ONCE(jiffies);
+
+ rsp->gp_start = j;
+ smp_wmb(); /* Record start time before stall time. */
+ rsp->jiffies_stall = j + rcu_jiffies_till_stall_check();
}
/*
@@ -932,17 +950,48 @@ static void print_cpu_stall(struct rcu_state *rsp)
static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
+ unsigned long completed;
+ unsigned long gpnum;
+ unsigned long gps;
unsigned long j;
unsigned long js;
struct rcu_node *rnp;
- if (rcu_cpu_stall_suppress)
+ if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
return;
j = ACCESS_ONCE(jiffies);
+
+ /*
+ * Lots of memory barriers to reject false positives.
+ *
+ * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall,
+ * then rsp->gp_start, and finally rsp->completed. These values
+ * are updated in the opposite order with memory barriers (or
+ * equivalent) during grace-period initialization and cleanup.
+ * Now, a false positive can occur if we get an new value of
+ * rsp->gp_start and a old value of rsp->jiffies_stall. But given
+ * the memory barriers, the only way that this can happen is if one
+ * grace period ends and another starts between these two fetches.
+ * Detect this by comparing rsp->completed with the previous fetch
+ * from rsp->gpnum.
+ *
+ * Given this check, comparisons of jiffies, rsp->jiffies_stall,
+ * and rsp->gp_start suffice to forestall false positives.
+ */
+ gpnum = ACCESS_ONCE(rsp->gpnum);
+ smp_rmb(); /* Pick up ->gpnum first... */
js = ACCESS_ONCE(rsp->jiffies_stall);
+ smp_rmb(); /* ...then ->jiffies_stall before the rest... */
+ gps = ACCESS_ONCE(rsp->gp_start);
+ smp_rmb(); /* ...and finally ->gp_start before ->completed. */
+ completed = ACCESS_ONCE(rsp->completed);
+ if (ULONG_CMP_GE(completed, gpnum) ||
+ ULONG_CMP_LT(j, js) ||
+ ULONG_CMP_GE(gps, js))
+ return; /* No stall or GP completed since entering function. */
rnp = rdp->mynode;
if (rcu_gp_in_progress(rsp) &&
- (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
+ (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) {
/* We haven't checked in, so go dump stack. */
print_cpu_stall(rsp);
@@ -1297,7 +1346,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
}
/*
- * Initialize a new grace period.
+ * Initialize a new grace period. Return 0 if no grace period required.
*/
static int rcu_gp_init(struct rcu_state *rsp)
{
@@ -1306,18 +1355,27 @@ static int rcu_gp_init(struct rcu_state *rsp)
rcu_bind_gp_kthread();
raw_spin_lock_irq(&rnp->lock);
+ if (rsp->gp_flags == 0) {
+ /* Spurious wakeup, tell caller to go back to sleep. */
+ raw_spin_unlock_irq(&rnp->lock);
+ return 0;
+ }
rsp->gp_flags = 0; /* Clear all flags: New grace period. */
- if (rcu_gp_in_progress(rsp)) {
- /* Grace period already in progress, don't start another. */
+ if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
+ /*
+ * Grace period already in progress, don't start another.
+ * Not supposed to be able to happen.
+ */
raw_spin_unlock_irq(&rnp->lock);
return 0;
}
/* Advance to a new grace period and initialize state. */
+ record_gp_stall_check_time(rsp);
+ smp_wmb(); /* Record GP times before starting GP. */
rsp->gpnum++;
trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
- record_gp_stall_check_time(rsp);
raw_spin_unlock_irq(&rnp->lock);
/* Exclude any concurrent CPU-hotplug operations. */
@@ -1366,7 +1424,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
/*
* Do one round of quiescent-state forcing.
*/
-int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
+static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
{
int fqs_state = fqs_state_in;
bool isidle = false;
@@ -1451,8 +1509,12 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
- if (cpu_needs_another_gp(rsp, rdp))
- rsp->gp_flags = 1;
+ if (cpu_needs_another_gp(rsp, rdp)) {
+ rsp->gp_flags = RCU_GP_FLAG_INIT;
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("newreq"));
+ }
raw_spin_unlock_irq(&rnp->lock);
}
@@ -1462,6 +1524,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
static int __noreturn rcu_gp_kthread(void *arg)
{
int fqs_state;
+ int gf;
unsigned long j;
int ret;
struct rcu_state *rsp = arg;
@@ -1471,14 +1534,19 @@ static int __noreturn rcu_gp_kthread(void *arg)
/* Handle grace-period start. */
for (;;) {
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("reqwait"));
wait_event_interruptible(rsp->gp_wq,
- rsp->gp_flags &
+ ACCESS_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
- if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
- rcu_gp_init(rsp))
+ if (rcu_gp_init(rsp))
break;
cond_resched();
flush_signals(current);
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("reqwaitsig"));
}
/* Handle quiescent-state forcing. */
@@ -1488,10 +1556,16 @@ static int __noreturn rcu_gp_kthread(void *arg)
j = HZ;
jiffies_till_first_fqs = HZ;
}
+ ret = 0;
for (;;) {
- rsp->jiffies_force_qs = jiffies + j;
+ if (!ret)
+ rsp->jiffies_force_qs = jiffies + j;
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("fqswait"));
ret = wait_event_interruptible_timeout(rsp->gp_wq,
- (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
+ ((gf = ACCESS_ONCE(rsp->gp_flags)) &
+ RCU_GP_FLAG_FQS) ||
(!ACCESS_ONCE(rnp->qsmask) &&
!rcu_preempt_blocked_readers_cgp(rnp)),
j);
@@ -1500,13 +1574,23 @@ static int __noreturn rcu_gp_kthread(void *arg)
!rcu_preempt_blocked_readers_cgp(rnp))
break;
/* If time for quiescent-state forcing, do it. */
- if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) {
+ if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
+ (gf & RCU_GP_FLAG_FQS)) {
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("fqsstart"));
fqs_state = rcu_gp_fqs(rsp, fqs_state);
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("fqsend"));
cond_resched();
} else {
/* Deal with stray signal. */
cond_resched();
flush_signals(current);
+ trace_rcu_grace_period(rsp->name,
+ ACCESS_ONCE(rsp->gpnum),
+ TPS("fqswaitsig"));
}
j = jiffies_till_next_fqs;
if (j > HZ) {
@@ -1554,6 +1638,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
return;
}
rsp->gp_flags = RCU_GP_FLAG_INIT;
+ trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
+ TPS("newreq"));
/*
* We can't do wakeups while holding the rnp->lock, as that
@@ -2255,7 +2341,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
* If called from an extended quiescent state, invoke the RCU
* core in order to force a re-evaluation of RCU's idleness.
*/
- if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
+ if (!rcu_is_watching() && cpu_online(smp_processor_id()))
invoke_rcu_core();
/* If interrupts were disabled or CPU offline, don't invoke RCU core. */
@@ -2725,10 +2811,13 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
for_each_rcu_flavor(rsp) {
rdp = per_cpu_ptr(rsp->rda, cpu);
- if (rdp->qlen != rdp->qlen_lazy)
+ if (!rdp->nxtlist)
+ continue;
+ hc = true;
+ if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
al = false;
- if (rdp->nxtlist)
- hc = true;
+ break;
+ }
}
if (all_lazy)
*all_lazy = al;
@@ -3216,7 +3305,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
/*
* Compute the rcu_node tree geometry from kernel parameters. This cannot
- * replace the definitions in rcutree.h because those are needed to size
+ * replace the definitions in tree.h because those are needed to size
* the ->node array in the rcu_state structure.
*/
static void __init rcu_init_geometry(void)
@@ -3295,8 +3384,8 @@ void __init rcu_init(void)
rcu_bootup_announce();
rcu_init_geometry();
- rcu_init_one(&rcu_sched_state, &rcu_sched_data);
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
+ rcu_init_one(&rcu_sched_state, &rcu_sched_data);
__rcu_init_preempt();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
@@ -3311,4 +3400,4 @@ void __init rcu_init(void)
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
}
-#include "rcutree_plugin.h"
+#include "tree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcu/tree.h
index 5f97eab602c..52be957c9fe 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcu/tree.h
@@ -104,6 +104,8 @@ struct rcu_dynticks {
/* idle-period nonlazy_posted snapshot. */
unsigned long last_accelerate;
/* Last jiffy CBs were accelerated. */
+ unsigned long last_advance_all;
+ /* Last jiffy CBs were all advanced. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
};
diff --git a/kernel/rcutree_plugin.h b/kernel/rcu/tree_plugin.h
index 130c97b027f..3822ac0c4b2 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -28,7 +28,7 @@
#include <linux/gfp.h>
#include <linux/oom.h>
#include <linux/smpboot.h>
-#include "time/tick-internal.h"
+#include "../time/tick-internal.h"
#define RCU_KTHREAD_PRIO 1
@@ -96,10 +96,15 @@ static void __init rcu_bootup_announce_oddness(void)
#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
#ifdef CONFIG_RCU_NOCB_CPU_ALL
pr_info("\tOffload RCU callbacks from all CPUs\n");
- cpumask_setall(rcu_nocb_mask);
+ cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
if (have_rcu_nocb_mask) {
+ if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+ pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+ cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+ rcu_nocb_mask);
+ }
cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
if (rcu_nocb_poll)
@@ -660,7 +665,7 @@ static void rcu_preempt_check_callbacks(int cpu)
static void rcu_preempt_do_callbacks(void)
{
- rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
+ rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
}
#endif /* #ifdef CONFIG_RCU_BOOST */
@@ -1128,7 +1133,7 @@ void exit_rcu(void)
#ifdef CONFIG_RCU_BOOST
-#include "rtmutex_common.h"
+#include "../rtmutex_common.h"
#ifdef CONFIG_RCU_TRACE
@@ -1332,7 +1337,7 @@ static void invoke_rcu_callbacks_kthread(void)
*/
static bool rcu_is_callbacks_kthread(void)
{
- return __get_cpu_var(rcu_cpu_kthread_task) == current;
+ return __this_cpu_read(rcu_cpu_kthread_task) == current;
}
#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
@@ -1382,8 +1387,8 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
static void rcu_kthread_do_work(void)
{
- rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
- rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+ rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
+ rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
rcu_preempt_do_callbacks();
}
@@ -1402,7 +1407,7 @@ static void rcu_cpu_kthread_park(unsigned int cpu)
static int rcu_cpu_kthread_should_run(unsigned int cpu)
{
- return __get_cpu_var(rcu_cpu_has_work);
+ return __this_cpu_read(rcu_cpu_has_work);
}
/*
@@ -1412,8 +1417,8 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu)
*/
static void rcu_cpu_kthread(unsigned int cpu)
{
- unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
- char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
+ unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
+ char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
int spincnt;
for (spincnt = 0; spincnt < 10; spincnt++) {
@@ -1630,17 +1635,23 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644);
extern int tick_nohz_enabled;
/*
- * Try to advance callbacks for all flavors of RCU on the current CPU.
- * Afterwards, if there are any callbacks ready for immediate invocation,
- * return true.
+ * Try to advance callbacks for all flavors of RCU on the current CPU, but
+ * only if it has been awhile since the last time we did so. Afterwards,
+ * if there are any callbacks ready for immediate invocation, return true.
*/
static bool rcu_try_advance_all_cbs(void)
{
bool cbs_ready = false;
struct rcu_data *rdp;
+ struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
struct rcu_node *rnp;
struct rcu_state *rsp;
+ /* Exit early if we advanced recently. */
+ if (jiffies == rdtp->last_advance_all)
+ return 0;
+ rdtp->last_advance_all = jiffies;
+
for_each_rcu_flavor(rsp) {
rdp = this_cpu_ptr(rsp->rda);
rnp = rdp->mynode;
@@ -1739,6 +1750,8 @@ static void rcu_prepare_for_idle(int cpu)
*/
if (rdtp->all_lazy &&
rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
+ rdtp->all_lazy = false;
+ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
invoke_rcu_core();
return;
}
@@ -1768,17 +1781,11 @@ static void rcu_prepare_for_idle(int cpu)
*/
static void rcu_cleanup_after_idle(int cpu)
{
- struct rcu_data *rdp;
- struct rcu_state *rsp;
if (rcu_is_nocb_cpu(cpu))
return;
- rcu_try_advance_all_cbs();
- for_each_rcu_flavor(rsp) {
- rdp = per_cpu_ptr(rsp->rda, cpu);
- if (cpu_has_callbacks_ready_to_invoke(rdp))
- invoke_rcu_core();
- }
+ if (rcu_try_advance_all_cbs())
+ invoke_rcu_core();
}
/*
@@ -2108,15 +2115,22 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
/* If we are not being polled and there is a kthread, awaken it ... */
t = ACCESS_ONCE(rdp->nocb_kthread);
- if (rcu_nocb_poll | !t)
+ if (rcu_nocb_poll || !t) {
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WakeNotPoll"));
return;
+ }
len = atomic_long_read(&rdp->nocb_q_count);
if (old_rhpp == &rdp->nocb_head) {
wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
rdp->qlen_last_fqs_check = 0;
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
wake_up_process(t); /* ... or if many callbacks queued. */
rdp->qlen_last_fqs_check = LONG_MAX / 2;
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
+ } else {
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
}
return;
}
@@ -2140,10 +2154,12 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
if (__is_kfree_rcu_offset((unsigned long)rhp->func))
trace_rcu_kfree_callback(rdp->rsp->name, rhp,
(unsigned long)rhp->func,
- rdp->qlen_lazy, rdp->qlen);
+ -atomic_long_read(&rdp->nocb_q_count_lazy),
+ -atomic_long_read(&rdp->nocb_q_count));
else
trace_rcu_callback(rdp->rsp->name, rhp,
- rdp->qlen_lazy, rdp->qlen);
+ -atomic_long_read(&rdp->nocb_q_count_lazy),
+ -atomic_long_read(&rdp->nocb_q_count));
return 1;
}
@@ -2221,6 +2237,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
static int rcu_nocb_kthread(void *arg)
{
int c, cl;
+ bool firsttime = 1;
struct rcu_head *list;
struct rcu_head *next;
struct rcu_head **tail;
@@ -2229,14 +2246,27 @@ static int rcu_nocb_kthread(void *arg)
/* Each pass through this loop invokes one batch of callbacks */
for (;;) {
/* If not polling, wait for next batch of callbacks. */
- if (!rcu_nocb_poll)
+ if (!rcu_nocb_poll) {
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("Sleep"));
wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
+ } else if (firsttime) {
+ firsttime = 0;
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("Poll"));
+ }
list = ACCESS_ONCE(rdp->nocb_head);
if (!list) {
+ if (!rcu_nocb_poll)
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WokeEmpty"));
schedule_timeout_interruptible(1);
flush_signals(current);
continue;
}
+ firsttime = 1;
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WokeNonEmpty"));
/*
* Extract queued callbacks, update counts, and wait
@@ -2257,7 +2287,11 @@ static int rcu_nocb_kthread(void *arg)
next = list->next;
/* Wait for enqueuing to complete, if needed. */
while (next == NULL && &list->next != tail) {
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WaitQueue"));
schedule_timeout_interruptible(1);
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WokeQueue"));
next = list->next;
}
debug_rcu_head_unqueue(list);
diff --git a/kernel/rcutree_trace.c b/kernel/rcu/tree_trace.c
index cf6c1741293..3596797b7e4 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -44,7 +44,7 @@
#include <linux/seq_file.h>
#define RCU_TREE_NONCORE
-#include "rcutree.h"
+#include "tree.h"
static int r_open(struct inode *inode, struct file *file,
const struct seq_operations *op)
diff --git a/kernel/rcupdate.c b/kernel/rcu/update.c
index b02a339836b..6cb3dff89e2 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcu/update.c
@@ -53,6 +53,12 @@
#include "rcu.h"
+MODULE_ALIAS("rcupdate");
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "rcupdate."
+
module_param(rcu_expedited, int, 0);
#ifdef CONFIG_PREEMPT_RCU
@@ -148,7 +154,7 @@ int rcu_read_lock_bh_held(void)
{
if (!debug_lockdep_rcu_enabled())
return 1;
- if (rcu_is_cpu_idle())
+ if (!rcu_is_watching())
return 0;
if (!rcu_lockdep_current_cpu_online())
return 0;
@@ -298,7 +304,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
#endif
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
-int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_suppress, int, 0644);
module_param(rcu_cpu_stall_timeout, int, 0644);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2f06f3c6a3..8b80f1bae21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -190,7 +190,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses it's own private copy */
-static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
+static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
static int sysrq_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *lenp,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 38959c86678..662c5798a68 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -33,29 +33,64 @@ struct ce_unbind {
int res;
};
-/**
- * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
- * @latch: value to convert
- * @evt: pointer to clock event device descriptor
- *
- * Math helper, returns latch value converted to nanoseconds (bound checked)
- */
-u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
+static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt,
+ bool ismax)
{
u64 clc = (u64) latch << evt->shift;
+ u64 rnd;
if (unlikely(!evt->mult)) {
evt->mult = 1;
WARN_ON(1);
}
+ rnd = (u64) evt->mult - 1;
+
+ /*
+ * Upper bound sanity check. If the backwards conversion is
+ * not equal latch, we know that the above shift overflowed.
+ */
+ if ((clc >> evt->shift) != (u64)latch)
+ clc = ~0ULL;
+
+ /*
+ * Scaled math oddities:
+ *
+ * For mult <= (1 << shift) we can safely add mult - 1 to
+ * prevent integer rounding loss. So the backwards conversion
+ * from nsec to device ticks will be correct.
+ *
+ * For mult > (1 << shift), i.e. device frequency is > 1GHz we
+ * need to be careful. Adding mult - 1 will result in a value
+ * which when converted back to device ticks can be larger
+ * than latch by up to (mult - 1) >> shift. For the min_delta
+ * calculation we still want to apply this in order to stay
+ * above the minimum device ticks limit. For the upper limit
+ * we would end up with a latch value larger than the upper
+ * limit of the device, so we omit the add to stay below the
+ * device upper boundary.
+ *
+ * Also omit the add if it would overflow the u64 boundary.
+ */
+ if ((~0ULL - clc > rnd) &&
+ (!ismax || evt->mult <= (1U << evt->shift)))
+ clc += rnd;
do_div(clc, evt->mult);
- if (clc < 1000)
- clc = 1000;
- if (clc > KTIME_MAX)
- clc = KTIME_MAX;
- return clc;
+ /* Deltas less than 1usec are pointless noise */
+ return clc > 1000 ? clc : 1000;
+}
+
+/**
+ * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
+ * @latch: value to convert
+ * @evt: pointer to clock event device descriptor
+ *
+ * Math helper, returns latch value converted to nanoseconds (bound checked)
+ */
+u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
+{
+ return cev_delta2ns(latch, evt, false);
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
@@ -380,8 +415,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq)
sec = 600;
clockevents_calc_mult_shift(dev, freq, sec);
- dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev);
- dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+ dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
+ dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
}
/**
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 80c36bcf66e..78e27e3b52a 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -26,7 +26,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
{
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event) &&
- perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+ perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
/* No tracing, just counting, so no obvious leak */