diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 11 | ||||
-rw-r--r-- | kernel/cgroup.c | 14 | ||||
-rw-r--r-- | kernel/events/core.c | 23 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 31 | ||||
-rw-r--r-- | kernel/kmod.c | 4 | ||||
-rw-r--r-- | kernel/lockdep.c | 4 | ||||
-rw-r--r-- | kernel/mutex.c | 32 | ||||
-rw-r--r-- | kernel/pid.c | 5 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 2 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 5 | ||||
-rw-r--r-- | kernel/power/user.c | 8 | ||||
-rw-r--r-- | kernel/rcu/Makefile | 6 | ||||
-rw-r--r-- | kernel/rcu/rcu.h (renamed from kernel/rcu.h) | 7 | ||||
-rw-r--r-- | kernel/rcu/srcu.c (renamed from kernel/srcu.c) | 0 | ||||
-rw-r--r-- | kernel/rcu/tiny.c (renamed from kernel/rcutiny.c) | 37 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h (renamed from kernel/rcutiny_plugin.h) | 0 | ||||
-rw-r--r-- | kernel/rcu/torture.c (renamed from kernel/rcutorture.c) | 6 | ||||
-rw-r--r-- | kernel/rcu/tree.c (renamed from kernel/rcutree.c) | 185 | ||||
-rw-r--r-- | kernel/rcu/tree.h (renamed from kernel/rcutree.h) | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h (renamed from kernel/rcutree_plugin.h) | 84 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c (renamed from kernel/rcutree_trace.c) | 2 | ||||
-rw-r--r-- | kernel/rcu/update.c (renamed from kernel/rcupdate.c) | 10 | ||||
-rw-r--r-- | kernel/sysctl.c | 2 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 65 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 2 |
25 files changed, 389 insertions, 158 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 1ce47553fb0..f99d908b555 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -6,9 +6,9 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ - rcupdate.o extable.o params.o posix-timers.o \ + extable.o params.o posix-timers.o \ kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ + hrtimer.o rwsem.o nsproxy.o semaphore.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o @@ -27,6 +27,7 @@ obj-y += power/ obj-y += printk/ obj-y += cpu/ obj-y += irq/ +obj-y += rcu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@ -81,12 +82,6 @@ obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_SECCOMP) += seccomp.o -obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o -obj-$(CONFIG_TREE_RCU) += rcutree.o -obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o -obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o -obj-$(CONFIG_TINY_RCU) += rcutiny.o -obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2418b6e71a8..8bd9cfdc70d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2039,7 +2039,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, /* @tsk either already exited or can't exit until the end */ if (tsk->flags & PF_EXITING) - continue; + goto next; /* as per above, nr_threads may decrease, but not increase. */ BUG_ON(i >= group_size); @@ -2047,7 +2047,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, ent.cgrp = task_cgroup_from_root(tsk, root); /* nothing to do if this task is already in the cgroup */ if (ent.cgrp == cgrp) - continue; + goto next; /* * saying GFP_ATOMIC has no effect here because we did prealloc * earlier, but it's good form to communicate our expectations. @@ -2055,7 +2055,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; - + next: if (!threadgroup) break; } while_each_thread(leader, tsk); @@ -3188,11 +3188,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, WARN_ON_ONCE(!rcu_read_lock_held()); - /* if first iteration, visit the leftmost descendant */ - if (!pos) { - next = css_leftmost_descendant(root); - return next != root ? next : NULL; - } + /* if first iteration, visit leftmost descendant which may be @root */ + if (!pos) + return css_leftmost_descendant(root); /* if we visited @root, we're done */ if (pos == root) diff --git a/kernel/events/core.c b/kernel/events/core.c index cb4238e85b3..663f43a20f7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6292,6 +6292,7 @@ type_show(struct device *dev, struct device_attribute *attr, char *page) return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); } +static DEVICE_ATTR_RO(type); static ssize_t perf_event_mux_interval_ms_show(struct device *dev, @@ -6336,17 +6337,19 @@ perf_event_mux_interval_ms_store(struct device *dev, return count; } +static DEVICE_ATTR_RW(perf_event_mux_interval_ms); -static struct device_attribute pmu_dev_attrs[] = { - __ATTR_RO(type), - __ATTR_RW(perf_event_mux_interval_ms), - __ATTR_NULL, +static struct attribute *pmu_dev_attrs[] = { + &dev_attr_type.attr, + &dev_attr_perf_event_mux_interval_ms.attr, + NULL, }; +ATTRIBUTE_GROUPS(pmu_dev); static int pmu_bus_running; static struct bus_type pmu_bus = { .name = "event_source", - .dev_attrs = pmu_dev_attrs, + .dev_groups = pmu_dev_groups, }; static void pmu_dev_release(struct device *dev) @@ -6767,6 +6770,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (ret) return -EFAULT; + /* disabled for now */ + if (attr->mmap2) + return -EINVAL; + if (attr->__reserved_1) return -EINVAL; @@ -7234,15 +7241,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) perf_remove_from_context(event); unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); - list_add(&event->event_entry, &events); + list_add(&event->migrate_entry, &events); } mutex_unlock(&src_ctx->mutex); synchronize_rcu(); mutex_lock(&dst_ctx->mutex); - list_for_each_entry_safe(event, tmp, &events, event_entry) { - list_del(&event->event_entry); + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; account_event_cpu(event, dst_cpu); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index cd55144270b..9c2ddfbf452 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -87,10 +87,31 @@ again: goto out; /* - * Publish the known good head. Rely on the full barrier implied - * by atomic_dec_and_test() order the rb->head read and this - * write. + * Since the mmap() consumer (userspace) can run on a different CPU: + * + * kernel user + * + * READ ->data_tail READ ->data_head + * smp_mb() (A) smp_rmb() (C) + * WRITE $data READ $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head WRITE ->data_tail + * + * Where A pairs with D, and B pairs with C. + * + * I don't think A needs to be a full barrier because we won't in fact + * write data until we see the store from userspace. So we simply don't + * issue the data WRITE until we observe it. Be conservative for now. + * + * OTOH, D needs to be a full barrier since it separates the data READ + * from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, and for C + * an RMB is sufficient since it separates two READs. + * + * See perf_output_begin(). */ + smp_wmb(); rb->user_page->data_head = head; /* @@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, * Userspace could choose to issue a mb() before updating the * tail pointer. So that all reads will be completed before the * write is issued. + * + * See perf_output_put_handle(). */ tail = ACCESS_ONCE(rb->user_page->data_tail); - smp_rmb(); + smp_mb(); offset = head = local_read(&rb->head); head += size; if (unlikely(!perf_output_space(rb, tail, offset, head))) diff --git a/kernel/kmod.c b/kernel/kmod.c index fb326365b69..b086006c59e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) DECLARE_COMPLETION_ONSTACK(done); int retval = 0; + if (!sub_info->path) { + call_usermodehelper_freeinfo(sub_info); + return -EINVAL; + } helper_lock(); if (!khelper_wq || usermodehelper_disabled) { retval = -EBUSY; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e16c45b9ee7..4e8e14c34e4 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" - : rcu_is_cpu_idle() + : !rcu_is_watching() ? "RCU used illegally from idle CPU!\n" : "", rcu_scheduler_active, debug_locks); @@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) * So complain bitterly if someone does call rcu_read_lock(), * rcu_read_lock_bh() and so on from extended quiescent states. */ - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) printk("RCU used illegally from extended quiescent state!\n"); lockdep_print_held_locks(curr); diff --git a/kernel/mutex.c b/kernel/mutex.c index 6d647aedffe..d24105b1b79 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -410,7 +410,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, static __always_inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -450,7 +450,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct task_struct *owner; struct mspin_node node; - if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -480,7 +480,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if ((atomic_read(&lock->count) == 1) && (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { lock_acquired(&lock->dep_map, ip); - if (!__builtin_constant_p(ww_ctx == NULL)) { + if (use_ww_ctx) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -551,7 +551,7 @@ slowpath: goto err; } - if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx->acquired > 0) { ret = __mutex_lock_check_stamp(lock, ww_ctx); if (ret) goto err; @@ -575,7 +575,7 @@ skip_wait: lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); - if (!__builtin_constant_p(ww_ctx == NULL)) { + if (use_ww_ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct mutex_waiter *cur; @@ -615,7 +615,7 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -625,7 +625,7 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - 0, nest, _RET_IP_, NULL); + 0, nest, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); @@ -635,7 +635,7 @@ mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_KILLABLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); @@ -644,7 +644,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -682,7 +682,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx); + 0, &ctx->dep_map, _RET_IP_, ctx, 1); if (!ret && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); @@ -697,7 +697,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx); + 0, &ctx->dep_map, _RET_IP_, ctx, 1); if (!ret && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); @@ -809,28 +809,28 @@ __mutex_lock_slowpath(atomic_t *lock_count) struct mutex *lock = container_of(lock_count, struct mutex, count); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __mutex_lock_killable_slowpath(struct mutex *lock) { return __mutex_lock_common(lock, TASK_KILLABLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock) { return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx); + NULL, _RET_IP_, ctx, 1); } static noinline int __sched @@ -838,7 +838,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx); + NULL, _RET_IP_, ctx, 1); } #endif diff --git a/kernel/pid.c b/kernel/pid.c index ebe5e80b10f..9b9a2669814 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -273,6 +273,11 @@ void free_pid(struct pid *pid) */ wake_up_process(ns->child_reaper); break; + case PIDNS_HASH_ADDING: + /* Handle a fork failure of the first process */ + WARN_ON(ns->child_reaper); + ns->nr_hashed = 0; + /* fall through */ case 0: schedule_work(&ns->proc_work); break; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c9c759d5a15..0121dab83f4 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -846,7 +846,7 @@ static int software_resume(void) goto Finish; } -late_initcall(software_resume); +late_initcall_sync(software_resume); static const char * const hibernation_modes[] = { diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 358a146fd4d..98c3b34a4cf 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -743,7 +743,10 @@ int create_basic_memory_bitmaps(void) struct memory_bitmap *bm1, *bm2; int error = 0; - BUG_ON(forbidden_pages_map || free_pages_map); + if (forbidden_pages_map && free_pages_map) + return 0; + else + BUG_ON(forbidden_pages_map || free_pages_map); bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); if (!bm1) diff --git a/kernel/power/user.c b/kernel/power/user.c index 72e8f4fd616..957f06164ad 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -39,6 +39,7 @@ static struct snapshot_data { char frozen; char ready; char platform_support; + bool free_bitmaps; } snapshot_state; atomic_t snapshot_device_available = ATOMIC_INIT(1); @@ -82,6 +83,10 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + if (!error) { + error = create_basic_memory_bitmaps(); + data->free_bitmaps = !error; + } if (error) pm_notifier_call_chain(PM_POST_RESTORE); } @@ -111,6 +116,8 @@ static int snapshot_release(struct inode *inode, struct file *filp) pm_restore_gfp_mask(); free_basic_memory_bitmaps(); thaw_processes(); + } else if (data->free_bitmaps) { + free_basic_memory_bitmaps(); } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); @@ -231,6 +238,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; pm_restore_gfp_mask(); free_basic_memory_bitmaps(); + data->free_bitmaps = false; thaw_processes(); data->frozen = 0; break; diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile new file mode 100644 index 00000000000..01e9ec37a3e --- /dev/null +++ b/kernel/rcu/Makefile @@ -0,0 +1,6 @@ +obj-y += update.o srcu.o +obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o +obj-$(CONFIG_TREE_RCU) += tree.o +obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o +obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o +obj-$(CONFIG_TINY_RCU) += tiny.o diff --git a/kernel/rcu.h b/kernel/rcu/rcu.h index 77131966c4a..7859a0a3951 100644 --- a/kernel/rcu.h +++ b/kernel/rcu/rcu.h @@ -122,4 +122,11 @@ int rcu_jiffies_till_stall_check(void); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ +/* + * Strings used in tracepoints need to be exported via the + * tracing system such that tools like perf and trace-cmd can + * translate the string address pointers to actual text. + */ +#define TPS(x) tracepoint_string(x) + #endif /* __LINUX_RCU_H */ diff --git a/kernel/srcu.c b/kernel/rcu/srcu.c index 01d5ccb8bfe..01d5ccb8bfe 100644 --- a/kernel/srcu.c +++ b/kernel/rcu/srcu.c diff --git a/kernel/rcutiny.c b/kernel/rcu/tiny.c index 9ed6075dc56..0c9a934cfec 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcu/tiny.c @@ -35,6 +35,7 @@ #include <linux/time.h> #include <linux/cpu.h> #include <linux/prefetch.h> +#include <linux/ftrace_event.h> #ifdef CONFIG_RCU_TRACE #include <trace/events/rcu.h> @@ -42,7 +43,7 @@ #include "rcu.h" -/* Forward declarations for rcutiny_plugin.h. */ +/* Forward declarations for tiny_plugin.h. */ struct rcu_ctrlblk; static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void rcu_process_callbacks(struct softirq_action *unused); @@ -52,22 +53,23 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; -#include "rcutiny_plugin.h" +#include "tiny_plugin.h" /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long newval) { if (newval) { - RCU_TRACE(trace_rcu_dyntick("--=", + RCU_TRACE(trace_rcu_dyntick(TPS("--="), rcu_dynticks_nesting, newval)); rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); + RCU_TRACE(trace_rcu_dyntick(TPS("Start"), + rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -120,15 +122,15 @@ EXPORT_SYMBOL_GPL(rcu_irq_exit); static void rcu_idle_exit_common(long long oldval) { if (oldval) { - RCU_TRACE(trace_rcu_dyntick("++=", + RCU_TRACE(trace_rcu_dyntick(TPS("++="), oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -174,18 +176,18 @@ void rcu_irq_enter(void) } EXPORT_SYMBOL_GPL(rcu_irq_enter); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* * Test whether RCU thinks that the current CPU is idle. */ -int rcu_is_cpu_idle(void) +bool __rcu_is_watching(void) { - return !rcu_dynticks_nesting; + return rcu_dynticks_nesting; } -EXPORT_SYMBOL(rcu_is_cpu_idle); +EXPORT_SYMBOL(__rcu_is_watching); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ /* * Test whether the current CPU was interrupted from idle. Nested @@ -273,7 +275,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) if (&rcp->rcucblist == rcp->donetail) { RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, - ACCESS_ONCE(rcp->rcucblist), + !!ACCESS_ONCE(rcp->rcucblist), need_resched(), is_idle_task(current), false)); @@ -304,7 +306,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), + RCU_TRACE(trace_rcu_batch_end(rcp->name, + cb_count, 0, need_resched(), is_idle_task(current), false)); } diff --git a/kernel/rcutiny_plugin.h b/kernel/rcu/tiny_plugin.h index 280d06cae35..280d06cae35 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h diff --git a/kernel/rcutorture.c b/kernel/rcu/torture.c index be63101c617..3929cd45151 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcu/torture.c @@ -52,6 +52,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); +MODULE_ALIAS("rcutorture"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutorture." + static int fqs_duration; module_param(fqs_duration, int, 0444); MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); diff --git a/kernel/rcutree.c b/kernel/rcu/tree.c index 32618b3fe4e..8a2c81e86dd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcu/tree.c @@ -41,6 +41,7 @@ #include <linux/export.h> #include <linux/completion.h> #include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/cpu.h> @@ -56,17 +57,16 @@ #include <linux/ftrace_event.h> #include <linux/suspend.h> -#include "rcutree.h" +#include "tree.h" #include <trace/events/rcu.h> #include "rcu.h" -/* - * Strings used in tracepoints need to be exported via the - * tracing system such that tools like perf and trace-cmd can - * translate the string address pointers to actual text. - */ -#define TPS(x) tracepoint_string(x) +MODULE_ALIAS("rcutree"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutree." /* Data structures. */ @@ -222,7 +222,7 @@ void rcu_note_context_switch(int cpu) } EXPORT_SYMBOL_GPL(rcu_note_context_switch); -DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { +static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_NO_HZ_FULL_SYSIDLE @@ -371,7 +371,8 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, { trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); ftrace_dump(DUMP_ORIG); @@ -407,7 +408,7 @@ static void rcu_eqs_enter(bool user) long long oldval; struct rcu_dynticks *rdtp; - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) @@ -435,7 +436,7 @@ void rcu_idle_enter(void) local_irq_save(flags); rcu_eqs_enter(false); - rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0); + rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -478,7 +479,7 @@ void rcu_irq_exit(void) struct rcu_dynticks *rdtp; local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; WARN_ON_ONCE(rdtp->dynticks_nesting < 0); @@ -508,7 +509,8 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on exit: not idle task"), oldval, rdtp->dynticks_nesting); @@ -528,7 +530,7 @@ static void rcu_eqs_exit(bool user) struct rcu_dynticks *rdtp; long long oldval; - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(oldval < 0); if (oldval & DYNTICK_TASK_NEST_MASK) @@ -555,7 +557,7 @@ void rcu_idle_exit(void) local_irq_save(flags); rcu_eqs_exit(false); - rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0); + rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -599,7 +601,7 @@ void rcu_irq_enter(void) long long oldval; local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; WARN_ON_ONCE(rdtp->dynticks_nesting == 0); @@ -620,7 +622,7 @@ void rcu_irq_enter(void) */ void rcu_nmi_enter(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); if (rdtp->dynticks_nmi_nesting == 0 && (atomic_read(&rdtp->dynticks) & 0x1)) @@ -642,7 +644,7 @@ void rcu_nmi_enter(void) */ void rcu_nmi_exit(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); if (rdtp->dynticks_nmi_nesting == 0 || --rdtp->dynticks_nmi_nesting != 0) @@ -655,21 +657,34 @@ void rcu_nmi_exit(void) } /** - * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle + * __rcu_is_watching - are RCU read-side critical sections safe? + * + * Return true if RCU is watching the running CPU, which means that + * this CPU can safely enter RCU read-side critical sections. Unlike + * rcu_is_watching(), the caller of __rcu_is_watching() must have at + * least disabled preemption. + */ +bool __rcu_is_watching(void) +{ + return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1; +} + +/** + * rcu_is_watching - see if RCU thinks that the current CPU is idle * * If the current CPU is in its idle loop and is neither in an interrupt * or NMI handler, return true. */ -int rcu_is_cpu_idle(void) +bool rcu_is_watching(void) { int ret; preempt_disable(); - ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; + ret = __rcu_is_watching(); preempt_enable(); return ret; } -EXPORT_SYMBOL(rcu_is_cpu_idle); +EXPORT_SYMBOL_GPL(rcu_is_watching); #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) @@ -703,7 +718,7 @@ bool rcu_lockdep_current_cpu_online(void) if (in_nmi()) return 1; preempt_disable(); - rdp = &__get_cpu_var(rcu_sched_data); + rdp = this_cpu_ptr(&rcu_sched_data); rnp = rdp->mynode; ret = (rdp->grpmask & rnp->qsmaskinit) || !rcu_scheduler_fully_active; @@ -723,7 +738,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; } /* @@ -802,8 +817,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, static void record_gp_stall_check_time(struct rcu_state *rsp) { - rsp->gp_start = jiffies; - rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); + unsigned long j = ACCESS_ONCE(jiffies); + + rsp->gp_start = j; + smp_wmb(); /* Record start time before stall time. */ + rsp->jiffies_stall = j + rcu_jiffies_till_stall_check(); } /* @@ -932,17 +950,48 @@ static void print_cpu_stall(struct rcu_state *rsp) static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { + unsigned long completed; + unsigned long gpnum; + unsigned long gps; unsigned long j; unsigned long js; struct rcu_node *rnp; - if (rcu_cpu_stall_suppress) + if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) return; j = ACCESS_ONCE(jiffies); + + /* + * Lots of memory barriers to reject false positives. + * + * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall, + * then rsp->gp_start, and finally rsp->completed. These values + * are updated in the opposite order with memory barriers (or + * equivalent) during grace-period initialization and cleanup. + * Now, a false positive can occur if we get an new value of + * rsp->gp_start and a old value of rsp->jiffies_stall. But given + * the memory barriers, the only way that this can happen is if one + * grace period ends and another starts between these two fetches. + * Detect this by comparing rsp->completed with the previous fetch + * from rsp->gpnum. + * + * Given this check, comparisons of jiffies, rsp->jiffies_stall, + * and rsp->gp_start suffice to forestall false positives. + */ + gpnum = ACCESS_ONCE(rsp->gpnum); + smp_rmb(); /* Pick up ->gpnum first... */ js = ACCESS_ONCE(rsp->jiffies_stall); + smp_rmb(); /* ...then ->jiffies_stall before the rest... */ + gps = ACCESS_ONCE(rsp->gp_start); + smp_rmb(); /* ...and finally ->gp_start before ->completed. */ + completed = ACCESS_ONCE(rsp->completed); + if (ULONG_CMP_GE(completed, gpnum) || + ULONG_CMP_LT(j, js) || + ULONG_CMP_GE(gps, js)) + return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; if (rcu_gp_in_progress(rsp) && - (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); @@ -1297,7 +1346,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * Initialize a new grace period. + * Initialize a new grace period. Return 0 if no grace period required. */ static int rcu_gp_init(struct rcu_state *rsp) { @@ -1306,18 +1355,27 @@ static int rcu_gp_init(struct rcu_state *rsp) rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); + if (rsp->gp_flags == 0) { + /* Spurious wakeup, tell caller to go back to sleep. */ + raw_spin_unlock_irq(&rnp->lock); + return 0; + } rsp->gp_flags = 0; /* Clear all flags: New grace period. */ - if (rcu_gp_in_progress(rsp)) { - /* Grace period already in progress, don't start another. */ + if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { + /* + * Grace period already in progress, don't start another. + * Not supposed to be able to happen. + */ raw_spin_unlock_irq(&rnp->lock); return 0; } /* Advance to a new grace period and initialize state. */ + record_gp_stall_check_time(rsp); + smp_wmb(); /* Record GP times before starting GP. */ rsp->gpnum++; trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); - record_gp_stall_check_time(rsp); raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ @@ -1366,7 +1424,7 @@ static int rcu_gp_init(struct rcu_state *rsp) /* * Do one round of quiescent-state forcing. */ -int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) +static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) { int fqs_state = fqs_state_in; bool isidle = false; @@ -1451,8 +1509,12 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ - if (cpu_needs_another_gp(rsp, rdp)) - rsp->gp_flags = 1; + if (cpu_needs_another_gp(rsp, rdp)) { + rsp->gp_flags = RCU_GP_FLAG_INIT; + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("newreq")); + } raw_spin_unlock_irq(&rnp->lock); } @@ -1462,6 +1524,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) static int __noreturn rcu_gp_kthread(void *arg) { int fqs_state; + int gf; unsigned long j; int ret; struct rcu_state *rsp = arg; @@ -1471,14 +1534,19 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Handle grace-period start. */ for (;;) { + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("reqwait")); wait_event_interruptible(rsp->gp_wq, - rsp->gp_flags & + ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); - if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && - rcu_gp_init(rsp)) + if (rcu_gp_init(rsp)) break; cond_resched(); flush_signals(current); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("reqwaitsig")); } /* Handle quiescent-state forcing. */ @@ -1488,10 +1556,16 @@ static int __noreturn rcu_gp_kthread(void *arg) j = HZ; jiffies_till_first_fqs = HZ; } + ret = 0; for (;;) { - rsp->jiffies_force_qs = jiffies + j; + if (!ret) + rsp->jiffies_force_qs = jiffies + j; + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqswait")); ret = wait_event_interruptible_timeout(rsp->gp_wq, - (rsp->gp_flags & RCU_GP_FLAG_FQS) || + ((gf = ACCESS_ONCE(rsp->gp_flags)) & + RCU_GP_FLAG_FQS) || (!ACCESS_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)), j); @@ -1500,13 +1574,23 @@ static int __noreturn rcu_gp_kthread(void *arg) !rcu_preempt_blocked_readers_cgp(rnp)) break; /* If time for quiescent-state forcing, do it. */ - if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { + if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || + (gf & RCU_GP_FLAG_FQS)) { + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqsstart")); fqs_state = rcu_gp_fqs(rsp, fqs_state); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqsend")); cond_resched(); } else { /* Deal with stray signal. */ cond_resched(); flush_signals(current); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqswaitsig")); } j = jiffies_till_next_fqs; if (j > HZ) { @@ -1554,6 +1638,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, return; } rsp->gp_flags = RCU_GP_FLAG_INIT; + trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), + TPS("newreq")); /* * We can't do wakeups while holding the rnp->lock, as that @@ -2255,7 +2341,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. */ - if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) + if (!rcu_is_watching() && cpu_online(smp_processor_id())) invoke_rcu_core(); /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ @@ -2725,10 +2811,13 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) for_each_rcu_flavor(rsp) { rdp = per_cpu_ptr(rsp->rda, cpu); - if (rdp->qlen != rdp->qlen_lazy) + if (!rdp->nxtlist) + continue; + hc = true; + if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { al = false; - if (rdp->nxtlist) - hc = true; + break; + } } if (all_lazy) *all_lazy = al; @@ -3216,7 +3305,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* * Compute the rcu_node tree geometry from kernel parameters. This cannot - * replace the definitions in rcutree.h because those are needed to size + * replace the definitions in tree.h because those are needed to size * the ->node array in the rcu_state structure. */ static void __init rcu_init_geometry(void) @@ -3295,8 +3384,8 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); - rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); + rcu_init_one(&rcu_sched_state, &rcu_sched_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); @@ -3311,4 +3400,4 @@ void __init rcu_init(void) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); } -#include "rcutree_plugin.h" +#include "tree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcu/tree.h index 5f97eab602c..52be957c9fe 100644 --- a/kernel/rcutree.h +++ b/kernel/rcu/tree.h @@ -104,6 +104,8 @@ struct rcu_dynticks { /* idle-period nonlazy_posted snapshot. */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ + unsigned long last_advance_all; + /* Last jiffy CBs were all advanced. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcu/tree_plugin.h index 130c97b027f..3822ac0c4b2 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -28,7 +28,7 @@ #include <linux/gfp.h> #include <linux/oom.h> #include <linux/smpboot.h> -#include "time/tick-internal.h" +#include "../time/tick-internal.h" #define RCU_KTHREAD_PRIO 1 @@ -96,10 +96,15 @@ static void __init rcu_bootup_announce_oddness(void) #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ #ifdef CONFIG_RCU_NOCB_CPU_ALL pr_info("\tOffload RCU callbacks from all CPUs\n"); - cpumask_setall(rcu_nocb_mask); + cpumask_copy(rcu_nocb_mask, cpu_possible_mask); #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ if (have_rcu_nocb_mask) { + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { + pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); + cpumask_and(rcu_nocb_mask, cpu_possible_mask, + rcu_nocb_mask); + } cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); if (rcu_nocb_poll) @@ -660,7 +665,7 @@ static void rcu_preempt_check_callbacks(int cpu) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); + rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -1128,7 +1133,7 @@ void exit_rcu(void) #ifdef CONFIG_RCU_BOOST -#include "rtmutex_common.h" +#include "../rtmutex_common.h" #ifdef CONFIG_RCU_TRACE @@ -1332,7 +1337,7 @@ static void invoke_rcu_callbacks_kthread(void) */ static bool rcu_is_callbacks_kthread(void) { - return __get_cpu_var(rcu_cpu_kthread_task) == current; + return __this_cpu_read(rcu_cpu_kthread_task) == current; } #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) @@ -1382,8 +1387,8 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, static void rcu_kthread_do_work(void) { - rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); - rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); rcu_preempt_do_callbacks(); } @@ -1402,7 +1407,7 @@ static void rcu_cpu_kthread_park(unsigned int cpu) static int rcu_cpu_kthread_should_run(unsigned int cpu) { - return __get_cpu_var(rcu_cpu_has_work); + return __this_cpu_read(rcu_cpu_has_work); } /* @@ -1412,8 +1417,8 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu) */ static void rcu_cpu_kthread(unsigned int cpu) { - unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); - char work, *workp = &__get_cpu_var(rcu_cpu_has_work); + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); int spincnt; for (spincnt = 0; spincnt < 10; spincnt++) { @@ -1630,17 +1635,23 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Try to advance callbacks for all flavors of RCU on the current CPU. - * Afterwards, if there are any callbacks ready for immediate invocation, - * return true. + * Try to advance callbacks for all flavors of RCU on the current CPU, but + * only if it has been awhile since the last time we did so. Afterwards, + * if there are any callbacks ready for immediate invocation, return true. */ static bool rcu_try_advance_all_cbs(void) { bool cbs_ready = false; struct rcu_data *rdp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); struct rcu_node *rnp; struct rcu_state *rsp; + /* Exit early if we advanced recently. */ + if (jiffies == rdtp->last_advance_all) + return 0; + rdtp->last_advance_all = jiffies; + for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); rnp = rdp->mynode; @@ -1739,6 +1750,8 @@ static void rcu_prepare_for_idle(int cpu) */ if (rdtp->all_lazy && rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + rdtp->all_lazy = false; + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; invoke_rcu_core(); return; } @@ -1768,17 +1781,11 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_cleanup_after_idle(int cpu) { - struct rcu_data *rdp; - struct rcu_state *rsp; if (rcu_is_nocb_cpu(cpu)) return; - rcu_try_advance_all_cbs(); - for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_core(); - } + if (rcu_try_advance_all_cbs()) + invoke_rcu_core(); } /* @@ -2108,15 +2115,22 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, /* If we are not being polled and there is a kthread, awaken it ... */ t = ACCESS_ONCE(rdp->nocb_kthread); - if (rcu_nocb_poll | !t) + if (rcu_nocb_poll || !t) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeNotPoll")); return; + } len = atomic_long_read(&rdp->nocb_q_count); if (old_rhpp == &rdp->nocb_head) { wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ rdp->qlen_last_fqs_check = 0; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else if (len > rdp->qlen_last_fqs_check + qhimark) { wake_up_process(t); /* ... or if many callbacks queued. */ rdp->qlen_last_fqs_check = LONG_MAX / 2; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); + } else { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); } return; } @@ -2140,10 +2154,12 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, if (__is_kfree_rcu_offset((unsigned long)rhp->func)) trace_rcu_kfree_callback(rdp->rsp->name, rhp, (unsigned long)rhp->func, - rdp->qlen_lazy, rdp->qlen); + -atomic_long_read(&rdp->nocb_q_count_lazy), + -atomic_long_read(&rdp->nocb_q_count)); else trace_rcu_callback(rdp->rsp->name, rhp, - rdp->qlen_lazy, rdp->qlen); + -atomic_long_read(&rdp->nocb_q_count_lazy), + -atomic_long_read(&rdp->nocb_q_count)); return 1; } @@ -2221,6 +2237,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) static int rcu_nocb_kthread(void *arg) { int c, cl; + bool firsttime = 1; struct rcu_head *list; struct rcu_head *next; struct rcu_head **tail; @@ -2229,14 +2246,27 @@ static int rcu_nocb_kthread(void *arg) /* Each pass through this loop invokes one batch of callbacks */ for (;;) { /* If not polling, wait for next batch of callbacks. */ - if (!rcu_nocb_poll) + if (!rcu_nocb_poll) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("Sleep")); wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); + } else if (firsttime) { + firsttime = 0; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("Poll")); + } list = ACCESS_ONCE(rdp->nocb_head); if (!list) { + if (!rcu_nocb_poll) + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeEmpty")); schedule_timeout_interruptible(1); flush_signals(current); continue; } + firsttime = 1; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeNonEmpty")); /* * Extract queued callbacks, update counts, and wait @@ -2257,7 +2287,11 @@ static int rcu_nocb_kthread(void *arg) next = list->next; /* Wait for enqueuing to complete, if needed. */ while (next == NULL && &list->next != tail) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WaitQueue")); schedule_timeout_interruptible(1); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeQueue")); next = list->next; } debug_rcu_head_unqueue(list); diff --git a/kernel/rcutree_trace.c b/kernel/rcu/tree_trace.c index cf6c1741293..3596797b7e4 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -44,7 +44,7 @@ #include <linux/seq_file.h> #define RCU_TREE_NONCORE -#include "rcutree.h" +#include "tree.h" static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) diff --git a/kernel/rcupdate.c b/kernel/rcu/update.c index b02a339836b..6cb3dff89e2 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcu/update.c @@ -53,6 +53,12 @@ #include "rcu.h" +MODULE_ALIAS("rcupdate"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcupdate." + module_param(rcu_expedited, int, 0); #ifdef CONFIG_PREEMPT_RCU @@ -148,7 +154,7 @@ int rcu_read_lock_bh_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -298,7 +304,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #endif int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ -int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; +static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_suppress, int, 0644); module_param(rcu_cpu_stall_timeout, int, 0644); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2f06f3c6a3..8b80f1bae21 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -190,7 +190,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ -static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; +static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static int sysrq_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 38959c86678..662c5798a68 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -33,29 +33,64 @@ struct ce_unbind { int res; }; -/** - * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds - * @latch: value to convert - * @evt: pointer to clock event device descriptor - * - * Math helper, returns latch value converted to nanoseconds (bound checked) - */ -u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, + bool ismax) { u64 clc = (u64) latch << evt->shift; + u64 rnd; if (unlikely(!evt->mult)) { evt->mult = 1; WARN_ON(1); } + rnd = (u64) evt->mult - 1; + + /* + * Upper bound sanity check. If the backwards conversion is + * not equal latch, we know that the above shift overflowed. + */ + if ((clc >> evt->shift) != (u64)latch) + clc = ~0ULL; + + /* + * Scaled math oddities: + * + * For mult <= (1 << shift) we can safely add mult - 1 to + * prevent integer rounding loss. So the backwards conversion + * from nsec to device ticks will be correct. + * + * For mult > (1 << shift), i.e. device frequency is > 1GHz we + * need to be careful. Adding mult - 1 will result in a value + * which when converted back to device ticks can be larger + * than latch by up to (mult - 1) >> shift. For the min_delta + * calculation we still want to apply this in order to stay + * above the minimum device ticks limit. For the upper limit + * we would end up with a latch value larger than the upper + * limit of the device, so we omit the add to stay below the + * device upper boundary. + * + * Also omit the add if it would overflow the u64 boundary. + */ + if ((~0ULL - clc > rnd) && + (!ismax || evt->mult <= (1U << evt->shift))) + clc += rnd; do_div(clc, evt->mult); - if (clc < 1000) - clc = 1000; - if (clc > KTIME_MAX) - clc = KTIME_MAX; - return clc; + /* Deltas less than 1usec are pointless noise */ + return clc > 1000 ? clc : 1000; +} + +/** + * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds + * @latch: value to convert + * @evt: pointer to clock event device descriptor + * + * Math helper, returns latch value converted to nanoseconds (bound checked) + */ +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +{ + return cev_delta2ns(latch, evt, false); } EXPORT_SYMBOL_GPL(clockevent_delta2ns); @@ -380,8 +415,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq) sec = 600; clockevents_calc_mult_shift(dev, freq, sec); - dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev); - dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev); + dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false); + dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true); } /** diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 80c36bcf66e..78e27e3b52a 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -26,7 +26,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, { /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) return -EPERM; /* No tracing, just counting, so no obvious leak */ |