diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 17 | ||||
-rw-r--r-- | kernel/cgroup.c | 62 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/kexec.c | 6 | ||||
-rw-r--r-- | kernel/lockdep.c | 84 | ||||
-rw-r--r-- | kernel/lockdep_internals.h | 72 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 58 | ||||
-rw-r--r-- | kernel/module.c | 8 | ||||
-rw-r--r-- | kernel/profile.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 11 | ||||
-rw-r--r-- | kernel/rcupdate.c | 30 | ||||
-rw-r--r-- | kernel/rcutiny.c | 35 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 39 | ||||
-rw-r--r-- | kernel/rcutorture.c | 2 | ||||
-rw-r--r-- | kernel/rcutree.c | 131 | ||||
-rw-r--r-- | kernel/rcutree.h | 2 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 69 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 101 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 30 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 2 |
26 files changed, 556 insertions, 225 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 24f8c81fc48..e4c0e1fee9b 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -353,17 +353,18 @@ restart: void acct_exit_ns(struct pid_namespace *ns) { - struct bsd_acct_struct *acct; + struct bsd_acct_struct *acct = ns->bacct; - spin_lock(&acct_lock); - acct = ns->bacct; - if (acct != NULL) { - if (acct->file != NULL) - acct_file_reopen(acct, NULL, NULL); + if (acct == NULL) + return; - kfree(acct); - } + del_timer_sync(&acct->timer); + spin_lock(&acct_lock); + if (acct->file != NULL) + acct_file_reopen(acct, NULL, NULL); spin_unlock(&acct_lock); + + kfree(acct); } /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e2769e13980..6d870f2d122 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1646,7 +1646,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry) int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) { char *start; - struct dentry *dentry = rcu_dereference(cgrp->dentry); + struct dentry *dentry = rcu_dereference_check(cgrp->dentry, + rcu_read_lock_held() || + cgroup_lock_is_held()); if (!dentry || cgrp == dummytop) { /* @@ -1662,13 +1664,17 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) *--start = '\0'; for (;;) { int len = dentry->d_name.len; + if ((start -= len) < buf) return -ENAMETOOLONG; - memcpy(start, cgrp->dentry->d_name.name, len); + memcpy(start, dentry->d_name.name, len); cgrp = cgrp->parent; if (!cgrp) break; - dentry = rcu_dereference(cgrp->dentry); + + dentry = rcu_dereference_check(cgrp->dentry, + rcu_read_lock_held() || + cgroup_lock_is_held()); if (!cgrp->parent) continue; if (--start < buf) @@ -4429,7 +4435,15 @@ __setup("cgroup_disable=", cgroup_disable); */ unsigned short css_id(struct cgroup_subsys_state *css) { - struct css_id *cssid = rcu_dereference(css->id); + struct css_id *cssid; + + /* + * This css_id() can return correct value when somone has refcnt + * on this or this is under rcu_read_lock(). Once css->id is allocated, + * it's unchanged until freed. + */ + cssid = rcu_dereference_check(css->id, + rcu_read_lock_held() || atomic_read(&css->refcnt)); if (cssid) return cssid->id; @@ -4439,7 +4453,10 @@ EXPORT_SYMBOL_GPL(css_id); unsigned short css_depth(struct cgroup_subsys_state *css) { - struct css_id *cssid = rcu_dereference(css->id); + struct css_id *cssid; + + cssid = rcu_dereference_check(css->id, + rcu_read_lock_held() || atomic_read(&css->refcnt)); if (cssid) return cssid->depth; @@ -4447,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css) } EXPORT_SYMBOL_GPL(css_depth); +/** + * css_is_ancestor - test "root" css is an ancestor of "child" + * @child: the css to be tested. + * @root: the css supporsed to be an ancestor of the child. + * + * Returns true if "root" is an ancestor of "child" in its hierarchy. Because + * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). + * But, considering usual usage, the csses should be valid objects after test. + * Assuming that the caller will do some action to the child if this returns + * returns true, the caller must take "child";s reference count. + * If "child" is valid object and this returns true, "root" is valid, too. + */ + bool css_is_ancestor(struct cgroup_subsys_state *child, const struct cgroup_subsys_state *root) { - struct css_id *child_id = rcu_dereference(child->id); - struct css_id *root_id = rcu_dereference(root->id); + struct css_id *child_id; + struct css_id *root_id; + bool ret = true; - if (!child_id || !root_id || (child_id->depth < root_id->depth)) - return false; - return child_id->stack[root_id->depth] == root_id->id; + rcu_read_lock(); + child_id = rcu_dereference(child->id); + root_id = rcu_dereference(root->id); + if (!child_id + || !root_id + || (child_id->depth < root_id->depth) + || (child_id->stack[root_id->depth] != root_id->id)) + ret = false; + rcu_read_unlock(); + return ret; } static void __free_css_id_cb(struct rcu_head *head) @@ -4555,13 +4593,13 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, { int subsys_id, i, depth = 0; struct cgroup_subsys_state *parent_css, *child_css; - struct css_id *child_id, *parent_id = NULL; + struct css_id *child_id, *parent_id; subsys_id = ss->subsys_id; parent_css = parent->subsys[subsys_id]; child_css = child->subsys[subsys_id]; - depth = css_depth(parent_css) + 1; parent_id = parent_css->id; + depth = parent_id->depth; child_id = get_new_cssid(ss, depth); if (IS_ERR(child_id)) diff --git a/kernel/fork.c b/kernel/fork.c index 5d3592deaf7..4d57d9e3a6e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1111,7 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif - p->stack_start = stack_start; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/kernel/kexec.c b/kernel/kexec.c index 87ebe8adc47..474a84715ea 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1134,11 +1134,9 @@ int crash_shrink_memory(unsigned long new_size) free_reserved_phys_range(end, crashk_res.end); - if (start == end) { - crashk_res.end = end; + if (start == end) release_resource(&crashk_res); - } else - crashk_res.end = end - 1; + crashk_res.end = end - 1; unlock: mutex_unlock(&kexec_mutex); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e9c759f06c1..ec21304856d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -431,20 +431,7 @@ static struct stack_trace lockdep_init_trace = { /* * Various lockdep statistics: */ -atomic_t chain_lookup_hits; -atomic_t chain_lookup_misses; -atomic_t hardirqs_on_events; -atomic_t hardirqs_off_events; -atomic_t redundant_hardirqs_on; -atomic_t redundant_hardirqs_off; -atomic_t softirqs_on_events; -atomic_t softirqs_off_events; -atomic_t redundant_softirqs_on; -atomic_t redundant_softirqs_off; -atomic_t nr_unused_locks; -atomic_t nr_cyclic_checks; -atomic_t nr_find_usage_forwards_checks; -atomic_t nr_find_usage_backwards_checks; +DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); #endif /* @@ -748,7 +735,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) return NULL; } class = lock_classes + nr_lock_classes++; - debug_atomic_inc(&nr_unused_locks); + debug_atomic_inc(nr_unused_locks); class->key = key; class->name = lock->name; class->subclass = subclass; @@ -818,7 +805,8 @@ static struct lock_list *alloc_list_entry(void) * Add a new dependency to the head of the list: */ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, - struct list_head *head, unsigned long ip, int distance) + struct list_head *head, unsigned long ip, + int distance, struct stack_trace *trace) { struct lock_list *entry; /* @@ -829,11 +817,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, if (!entry) return 0; - if (!save_trace(&entry->trace)) - return 0; - entry->class = this; entry->distance = distance; + entry->trace = *trace; /* * Since we never remove from the dependency list, the list can * be walked lockless by other CPUs, it's only allocation @@ -1205,7 +1191,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target, { int result; - debug_atomic_inc(&nr_cyclic_checks); + debug_atomic_inc(nr_cyclic_checks); result = __bfs_forwards(root, target, class_equal, target_entry); @@ -1242,7 +1228,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, { int result; - debug_atomic_inc(&nr_find_usage_forwards_checks); + debug_atomic_inc(nr_find_usage_forwards_checks); result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); @@ -1265,7 +1251,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, { int result; - debug_atomic_inc(&nr_find_usage_backwards_checks); + debug_atomic_inc(nr_find_usage_backwards_checks); result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); @@ -1635,12 +1621,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance) + struct held_lock *next, int distance, int trylock_loop) { struct lock_list *entry; int ret; struct lock_list this; struct lock_list *uninitialized_var(target_entry); + /* + * Static variable, serialized by the graph_lock(). + * + * We use this static variable to save the stack trace in case + * we call into this function multiple times due to encountering + * trylocks in the held lock stack. + */ + static struct stack_trace trace; /* * Prove that the new <prev> -> <next> dependency would not @@ -1688,20 +1682,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, } } + if (!trylock_loop && !save_trace(&trace)) + return 0; + /* * Ok, all validations passed, add the new lock * to the previous lock's dependency list: */ ret = add_lock_to_list(hlock_class(prev), hlock_class(next), &hlock_class(prev)->locks_after, - next->acquire_ip, distance); + next->acquire_ip, distance, &trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(next), hlock_class(prev), &hlock_class(next)->locks_before, - next->acquire_ip, distance); + next->acquire_ip, distance, &trace); if (!ret) return 0; @@ -1731,6 +1728,7 @@ static int check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; + int trylock_loop = 0; struct held_lock *hlock; /* @@ -1756,7 +1754,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) * added: */ if (hlock->read != 2) { - if (!check_prev_add(curr, hlock, next, distance)) + if (!check_prev_add(curr, hlock, next, + distance, trylock_loop)) return 0; /* * Stop after the first non-trylock entry, @@ -1779,6 +1778,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) if (curr->held_locks[depth].irq_context != curr->held_locks[depth-1].irq_context) break; + trylock_loop = 1; } return 1; out_bug: @@ -1825,7 +1825,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, list_for_each_entry(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: - debug_atomic_inc(&chain_lookup_hits); + debug_atomic_inc(chain_lookup_hits); if (very_verbose(class)) printk("\nhash chain already cached, key: " "%016Lx tail class: [%p] %s\n", @@ -1890,7 +1890,7 @@ cache_hit: chain_hlocks[chain->base + j] = class - lock_classes; } list_add_tail_rcu(&chain->entry, hash_head); - debug_atomic_inc(&chain_lookup_misses); + debug_atomic_inc(chain_lookup_misses); inc_chains(); return 1; @@ -2311,7 +2311,12 @@ void trace_hardirqs_on_caller(unsigned long ip) return; if (unlikely(curr->hardirqs_enabled)) { - debug_atomic_inc(&redundant_hardirqs_on); + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but loosing one hit + * in a stat is not a big deal. + */ + __debug_atomic_inc(redundant_hardirqs_on); return; } /* we'll do an OFF -> ON transition: */ @@ -2338,7 +2343,7 @@ void trace_hardirqs_on_caller(unsigned long ip) curr->hardirq_enable_ip = ip; curr->hardirq_enable_event = ++curr->irq_events; - debug_atomic_inc(&hardirqs_on_events); + debug_atomic_inc(hardirqs_on_events); } EXPORT_SYMBOL(trace_hardirqs_on_caller); @@ -2370,9 +2375,9 @@ void trace_hardirqs_off_caller(unsigned long ip) curr->hardirqs_enabled = 0; curr->hardirq_disable_ip = ip; curr->hardirq_disable_event = ++curr->irq_events; - debug_atomic_inc(&hardirqs_off_events); + debug_atomic_inc(hardirqs_off_events); } else - debug_atomic_inc(&redundant_hardirqs_off); + debug_atomic_inc(redundant_hardirqs_off); } EXPORT_SYMBOL(trace_hardirqs_off_caller); @@ -2396,7 +2401,7 @@ void trace_softirqs_on(unsigned long ip) return; if (curr->softirqs_enabled) { - debug_atomic_inc(&redundant_softirqs_on); + debug_atomic_inc(redundant_softirqs_on); return; } @@ -2406,7 +2411,7 @@ void trace_softirqs_on(unsigned long ip) curr->softirqs_enabled = 1; curr->softirq_enable_ip = ip; curr->softirq_enable_event = ++curr->irq_events; - debug_atomic_inc(&softirqs_on_events); + debug_atomic_inc(softirqs_on_events); /* * We are going to turn softirqs on, so set the * usage bit for all held locks, if hardirqs are @@ -2436,10 +2441,10 @@ void trace_softirqs_off(unsigned long ip) curr->softirqs_enabled = 0; curr->softirq_disable_ip = ip; curr->softirq_disable_event = ++curr->irq_events; - debug_atomic_inc(&softirqs_off_events); + debug_atomic_inc(softirqs_off_events); DEBUG_LOCKS_WARN_ON(!softirq_count()); } else - debug_atomic_inc(&redundant_softirqs_off); + debug_atomic_inc(redundant_softirqs_off); } static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) @@ -2644,7 +2649,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; break; case LOCK_USED: - debug_atomic_dec(&nr_unused_locks); + debug_atomic_dec(nr_unused_locks); break; default: if (!debug_locks_off_graph_unlock()) @@ -2750,7 +2755,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!class) return 0; } - debug_atomic_inc((atomic_t *)&class->ops); + atomic_inc((atomic_t *)&class->ops); if (very_verbose(class)) { printk("\nacquire class [%p] %s", class->key, class->name); if (class->name_version > 1) @@ -3801,8 +3806,11 @@ void lockdep_rcu_dereference(const char *file, const int line) { struct task_struct *curr = current; +#ifndef CONFIG_PROVE_RCU_REPEATEDLY if (!debug_locks_off()) return; +#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ + /* Note: the following can be executed concurrently, so be careful. */ printk("\n===================================================\n"); printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); printk( "---------------------------------------------------\n"); diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index a2ee95ad131..4f560cfedc8 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -110,30 +110,60 @@ lockdep_count_backward_deps(struct lock_class *class) #endif #ifdef CONFIG_DEBUG_LOCKDEP + +#include <asm/local.h> /* - * Various lockdep statistics: + * Various lockdep statistics. + * We want them per cpu as they are often accessed in fast path + * and we want to avoid too much cache bouncing. */ -extern atomic_t chain_lookup_hits; -extern atomic_t chain_lookup_misses; -extern atomic_t hardirqs_on_events; -extern atomic_t hardirqs_off_events; -extern atomic_t redundant_hardirqs_on; -extern atomic_t redundant_hardirqs_off; -extern atomic_t softirqs_on_events; -extern atomic_t softirqs_off_events; -extern atomic_t redundant_softirqs_on; -extern atomic_t redundant_softirqs_off; -extern atomic_t nr_unused_locks; -extern atomic_t nr_cyclic_checks; -extern atomic_t nr_cyclic_check_recursions; -extern atomic_t nr_find_usage_forwards_checks; -extern atomic_t nr_find_usage_forwards_recursions; -extern atomic_t nr_find_usage_backwards_checks; -extern atomic_t nr_find_usage_backwards_recursions; -# define debug_atomic_inc(ptr) atomic_inc(ptr) -# define debug_atomic_dec(ptr) atomic_dec(ptr) -# define debug_atomic_read(ptr) atomic_read(ptr) +struct lockdep_stats { + int chain_lookup_hits; + int chain_lookup_misses; + int hardirqs_on_events; + int hardirqs_off_events; + int redundant_hardirqs_on; + int redundant_hardirqs_off; + int softirqs_on_events; + int softirqs_off_events; + int redundant_softirqs_on; + int redundant_softirqs_off; + int nr_unused_locks; + int nr_cyclic_checks; + int nr_cyclic_check_recursions; + int nr_find_usage_forwards_checks; + int nr_find_usage_forwards_recursions; + int nr_find_usage_backwards_checks; + int nr_find_usage_backwards_recursions; +}; + +DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); + +#define __debug_atomic_inc(ptr) \ + this_cpu_inc(lockdep_stats.ptr); + +#define debug_atomic_inc(ptr) { \ + WARN_ON_ONCE(!irqs_disabled()); \ + __this_cpu_inc(lockdep_stats.ptr); \ +} + +#define debug_atomic_dec(ptr) { \ + WARN_ON_ONCE(!irqs_disabled()); \ + __this_cpu_dec(lockdep_stats.ptr); \ +} + +#define debug_atomic_read(ptr) ({ \ + struct lockdep_stats *__cpu_lockdep_stats; \ + unsigned long long __total = 0; \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \ + __total += __cpu_lockdep_stats->ptr; \ + } \ + __total; \ +}) #else +# define __debug_atomic_inc(ptr) do { } while (0) # define debug_atomic_inc(ptr) do { } while (0) # define debug_atomic_dec(ptr) do { } while (0) # define debug_atomic_read(ptr) 0 diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index d4aba4f3584..59b76c8ce9d 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = { static void lockdep_stats_debug_show(struct seq_file *m) { #ifdef CONFIG_DEBUG_LOCKDEP - unsigned int hi1 = debug_atomic_read(&hardirqs_on_events), - hi2 = debug_atomic_read(&hardirqs_off_events), - hr1 = debug_atomic_read(&redundant_hardirqs_on), - hr2 = debug_atomic_read(&redundant_hardirqs_off), - si1 = debug_atomic_read(&softirqs_on_events), - si2 = debug_atomic_read(&softirqs_off_events), - sr1 = debug_atomic_read(&redundant_softirqs_on), - sr2 = debug_atomic_read(&redundant_softirqs_off); - - seq_printf(m, " chain lookup misses: %11u\n", - debug_atomic_read(&chain_lookup_misses)); - seq_printf(m, " chain lookup hits: %11u\n", - debug_atomic_read(&chain_lookup_hits)); - seq_printf(m, " cyclic checks: %11u\n", - debug_atomic_read(&nr_cyclic_checks)); - seq_printf(m, " find-mask forwards checks: %11u\n", - debug_atomic_read(&nr_find_usage_forwards_checks)); - seq_printf(m, " find-mask backwards checks: %11u\n", - debug_atomic_read(&nr_find_usage_backwards_checks)); - - seq_printf(m, " hardirq on events: %11u\n", hi1); - seq_printf(m, " hardirq off events: %11u\n", hi2); - seq_printf(m, " redundant hardirq ons: %11u\n", hr1); - seq_printf(m, " redundant hardirq offs: %11u\n", hr2); - seq_printf(m, " softirq on events: %11u\n", si1); - seq_printf(m, " softirq off events: %11u\n", si2); - seq_printf(m, " redundant softirq ons: %11u\n", sr1); - seq_printf(m, " redundant softirq offs: %11u\n", sr2); + unsigned long long hi1 = debug_atomic_read(hardirqs_on_events), + hi2 = debug_atomic_read(hardirqs_off_events), + hr1 = debug_atomic_read(redundant_hardirqs_on), + hr2 = debug_atomic_read(redundant_hardirqs_off), + si1 = debug_atomic_read(softirqs_on_events), + si2 = debug_atomic_read(softirqs_off_events), + sr1 = debug_atomic_read(redundant_softirqs_on), + sr2 = debug_atomic_read(redundant_softirqs_off); + + seq_printf(m, " chain lookup misses: %11llu\n", + debug_atomic_read(chain_lookup_misses)); + seq_printf(m, " chain lookup hits: %11llu\n", + debug_atomic_read(chain_lookup_hits)); + seq_printf(m, " cyclic checks: %11llu\n", + debug_atomic_read(nr_cyclic_checks)); + seq_printf(m, " find-mask forwards checks: %11llu\n", + debug_atomic_read(nr_find_usage_forwards_checks)); + seq_printf(m, " find-mask backwards checks: %11llu\n", + debug_atomic_read(nr_find_usage_backwards_checks)); + + seq_printf(m, " hardirq on events: %11llu\n", hi1); + seq_printf(m, " hardirq off events: %11llu\n", hi2); + seq_printf(m, " redundant hardirq ons: %11llu\n", hr1); + seq_printf(m, " redundant hardirq offs: %11llu\n", hr2); + seq_printf(m, " softirq on events: %11llu\n", si1); + seq_printf(m, " softirq off events: %11llu\n", si2); + seq_printf(m, " redundant softirq ons: %11llu\n", sr1); + seq_printf(m, " redundant softirq offs: %11llu\n", sr2); #endif } @@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) #endif } #ifdef CONFIG_DEBUG_LOCKDEP - DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); + DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); #endif seq_printf(m, " lock-classes: %11lu [max: %lu]\n", nr_lock_classes, MAX_LOCKDEP_KEYS); diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026..b8a1e313448 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -59,8 +59,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/module.h> -EXPORT_TRACEPOINT_SYMBOL(module_get); - #if 0 #define DEBUGP printk #else @@ -515,6 +513,9 @@ MODINFO_ATTR(srcversion); static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD + +EXPORT_TRACEPOINT_SYMBOL(module_get); + /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { @@ -867,8 +868,7 @@ void module_put(struct module *module) smp_wmb(); /* see comment in module_refcount */ __this_cpu_inc(module->refptr->decs); - trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->decs)); + trace_module_put(module, _RET_IP_); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); diff --git a/kernel/profile.c b/kernel/profile.c index a55d3a367ae..dfadc5b729f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -127,8 +127,10 @@ int __ref profile_init(void) return 0; prof_buffer = vmalloc(buffer_bytes); - if (prof_buffer) + if (prof_buffer) { + memset(prof_buffer, 0, buffer_bytes); return 0; + } free_cpumask_var(prof_cpu_mask); return -ENOMEM; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 9fb51237b18..6af9cdd558b 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -14,7 +14,6 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/ptrace.h> #include <linux/security.h> #include <linux/signal.h> @@ -665,10 +664,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) struct task_struct *child; long ret; - /* - * This lock_kernel fixes a subtle race with suid exec - */ - lock_kernel(); if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); if (!ret) @@ -702,7 +697,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) out_put_task_struct: put_task_struct(child); out: - unlock_kernel(); return ret; } @@ -812,10 +806,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, struct task_struct *child; long ret; - /* - * This lock_kernel fixes a subtle race with suid exec - */ - lock_kernel(); if (request == PTRACE_TRACEME) { ret = ptrace_traceme(); goto out; @@ -845,7 +835,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, out_put_task_struct: put_task_struct(child); out: - unlock_kernel(); return ret; } #endif /* CONFIG_COMPAT */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 03a7ea1579f..72a8dc9567f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/module.h> -#include <linux/kernel_stat.h> #include <linux/hardirq.h> #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -64,9 +63,6 @@ struct lockdep_map rcu_sched_lock_map = EXPORT_SYMBOL_GPL(rcu_sched_lock_map); #endif -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); - #ifdef CONFIG_DEBUG_LOCK_ALLOC int debug_lockdep_rcu_enabled(void) @@ -97,21 +93,6 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. - */ -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} - -/* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ @@ -122,3 +103,14 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } + +#ifdef CONFIG_PROVE_RCU +/* + * wrapper function to avoid #include problems. + */ +int rcu_my_thread_group_empty(void) +{ + return thread_group_empty(current); +} +EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); +#endif /* #ifdef CONFIG_PROVE_RCU */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 9f6d9ff2572..38729d3cd23 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -44,9 +44,9 @@ struct rcu_ctrlblk { }; /* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_ctrlblk = { - .donetail = &rcu_ctrlblk.rcucblist, - .curtail = &rcu_ctrlblk.rcucblist, +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, }; static struct rcu_ctrlblk rcu_bh_ctrlblk = { @@ -54,6 +54,11 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { .curtail = &rcu_bh_ctrlblk.rcucblist, }; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_NO_HZ static long rcu_dynticks_nesting = 1; @@ -108,7 +113,8 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) */ void rcu_sched_qs(int cpu) { - if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) + if (rcu_qsctr_help(&rcu_sched_ctrlblk) + + rcu_qsctr_help(&rcu_bh_ctrlblk)) raise_softirq(RCU_SOFTIRQ); } @@ -173,7 +179,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_process_callbacks(struct softirq_action *unused) { - __rcu_process_callbacks(&rcu_ctrlblk); + __rcu_process_callbacks(&rcu_sched_ctrlblk); __rcu_process_callbacks(&rcu_bh_ctrlblk); } @@ -187,7 +193,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) * * Cool, huh? (Due to Josh Triplett.) * - * But we want to make this a static inline later. + * But we want to make this a static inline later. The cond_resched() + * currently makes this problematic. */ void synchronize_sched(void) { @@ -195,12 +202,6 @@ void synchronize_sched(void) } EXPORT_SYMBOL_GPL(synchronize_sched); -void synchronize_rcu_bh(void) -{ - synchronize_sched(); -} -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); - /* * Helper function for call_rcu() and call_rcu_bh(). */ @@ -226,7 +227,7 @@ static void __call_rcu(struct rcu_head *head, */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_ctrlblk); + __call_rcu(head, func, &rcu_sched_ctrlblk); } EXPORT_SYMBOL_GPL(call_rcu); @@ -244,11 +245,13 @@ void rcu_barrier(void) { struct rcu_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -256,11 +259,13 @@ void rcu_barrier_bh(void) { struct rcu_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_bh(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -268,11 +273,13 @@ void rcu_barrier_sched(void) { struct rcu_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); @@ -280,3 +287,5 @@ void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } + +#include "rcutiny_plugin.h" diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h new file mode 100644 index 00000000000..d223a92bc74 --- /dev/null +++ b/kernel/rcutiny_plugin.h @@ -0,0 +1,39 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Internal non-public definitions that provide either classic + * or preemptable semantics. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2009 + * + * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +#include <linux/kernel_stat.h> + +/* + * During boot, we forgive RCU lockdep issues. After this function is + * invoked, we start taking RCU lockdep issues seriously. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 58df55bf83e..077defb3457 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -464,9 +464,11 @@ static void rcu_bh_torture_synchronize(void) { struct rcu_bh_torture_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } static struct rcu_torture_ops rcu_bh_ops = { diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3ec8160fc75..d4437345706 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,6 +46,7 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/time.h> +#include <linux/kernel_stat.h> #include "rcutree.h" @@ -53,8 +54,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; -#define RCU_STATE_INITIALIZER(name) { \ - .level = { &name.node[0] }, \ +#define RCU_STATE_INITIALIZER(structname) { \ + .level = { &structname.node[0] }, \ .levelcnt = { \ NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ @@ -65,13 +66,14 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ .orphan_cbs_list = NULL, \ - .orphan_cbs_tail = &name.orphan_cbs_list, \ + .orphan_cbs_tail = &structname.orphan_cbs_list, \ .orphan_qlen = 0, \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ + .name = #structname, \ } struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); @@ -80,6 +82,9 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -97,25 +102,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(int cpu) { - struct rcu_data *rdp; + struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - rdp = &per_cpu(rcu_sched_data, cpu); rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; - rcu_preempt_note_context_switch(cpu); } void rcu_bh_qs(int cpu) { - struct rcu_data *rdp; + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp = &per_cpu(rcu_bh_data, cpu); rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } +/* + * Note a context switch. This is a quiescent state for RCU-sched, + * and requires special handling for preemptible RCU. + */ +void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(cpu); +} + #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = 1, @@ -438,6 +450,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +int rcu_cpu_stall_panicking __read_mostly; + static void record_gp_stall_check_time(struct rcu_state *rsp) { rsp->gp_start = jiffies; @@ -470,7 +484,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp) /* OK, time to rat on our buddy... */ - printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", + rsp->name); rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); rcu_print_task_stall(rnp); @@ -481,7 +496,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) if (rnp->qsmask & (1UL << cpu)) printk(" %d", rnp->grplo + cpu); } - printk(" (detected by %d, t=%ld jiffies)\n", + printk("} (detected by %d, t=%ld jiffies)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start)); trigger_all_cpu_backtrace(); @@ -497,8 +512,8 @@ static void print_cpu_stall(struct rcu_state *rsp) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", - smp_processor_id(), jiffies - rsp->gp_start); + printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", + rsp->name, smp_processor_id(), jiffies - rsp->gp_start); trigger_all_cpu_backtrace(); raw_spin_lock_irqsave(&rnp->lock, flags); @@ -515,6 +530,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) long delta; struct rcu_node *rnp; + if (rcu_cpu_stall_panicking) + return; delta = jiffies - rsp->jiffies_stall; rnp = rdp->mynode; if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { @@ -529,6 +546,21 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) } } +static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) +{ + rcu_cpu_stall_panicking = 1; + return NOTIFY_DONE; +} + +static struct notifier_block rcu_panic_block = { + .notifier_call = rcu_panic, +}; + +static void __init check_cpu_stall_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); +} + #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void record_gp_stall_check_time(struct rcu_state *rsp) @@ -539,6 +571,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { } +static void __init check_cpu_stall_init(void) +{ +} + #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -1125,8 +1161,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { - if (!rcu_pending(cpu)) - return; /* if nothing for RCU to do. */ if (user || (idle_cpu(cpu) && rcu_scheduler_active && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1158,7 +1192,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_bh_qs(cpu); } rcu_preempt_check_callbacks(cpu); - raise_softirq(RCU_SOFTIRQ); + if (rcu_pending(cpu)) + raise_softirq(RCU_SOFTIRQ); } #ifdef CONFIG_SMP @@ -1236,11 +1271,11 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: - - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ + /* Record dyntick-idle state. */ force_qs_rnp(rsp, dyntick_save_progress_counter); raw_spin_lock(&rnp->lock); /* irqs already disabled */ @@ -1449,11 +1484,13 @@ void synchronize_sched(void) if (rcu_blocking_is_gp()) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -1473,11 +1510,13 @@ void synchronize_rcu_bh(void) if (rcu_blocking_is_gp()) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_bh(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); @@ -1498,8 +1537,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) { + if (rdp->qs_pending && !rdp->passed_quiesc) { + + /* + * If force_quiescent_state() coming soon and this CPU + * needs a quiescent state, and this is either RCU-sched + * or RCU-bh, force a local reschedule. + */ rdp->n_rp_qs_pending++; + if (!rdp->preemptable && + ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, + jiffies)) + set_need_resched(); + } else if (rdp->qs_pending && rdp->passed_quiesc) { + rdp->n_rp_report_qs++; return 1; } @@ -1767,6 +1818,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, } /* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. This function also enables RCU lockdep checking. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + +/* * Compute the per-level fanout, either using the exact fanout specified * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. */ @@ -1849,6 +1915,14 @@ static void __init rcu_init_one(struct rcu_state *rsp) INIT_LIST_HEAD(&rnp->blocked_tasks[3]); } } + + rnp = rsp->level[NUM_RCU_LVLS - 1]; + for_each_possible_cpu(i) { + while (i > rnp->grphi) + rnp++; + rsp->rda[i]->mynode = rnp; + rcu_boot_init_percpu_data(i, rsp); + } } /* @@ -1859,19 +1933,11 @@ static void __init rcu_init_one(struct rcu_state *rsp) #define RCU_INIT_FLAVOR(rsp, rcu_data) \ do { \ int i; \ - int j; \ - struct rcu_node *rnp; \ \ - rcu_init_one(rsp); \ - rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ - j = 0; \ for_each_possible_cpu(i) { \ - if (i > rnp[j].grphi) \ - j++; \ - per_cpu(rcu_data, i).mynode = &rnp[j]; \ (rsp)->rda[i] = &per_cpu(rcu_data, i); \ - rcu_boot_init_percpu_data(i, rsp); \ } \ + rcu_init_one(rsp); \ } while (0) void __init rcu_init(void) @@ -1879,12 +1945,6 @@ void __init rcu_init(void) int cpu; rcu_bootup_announce(); -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#if NUM_RCU_LVL_4 != 0 - printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n"); -#endif /* #if NUM_RCU_LVL_4 != 0 */ RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); @@ -1898,6 +1958,7 @@ void __init rcu_init(void) cpu_notifier(rcu_cpu_notify, 0); for_each_online_cpu(cpu) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); + check_cpu_stall_init(); } #include "rcutree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4a525a30e08..14c040b18ed 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -223,6 +223,7 @@ struct rcu_data { /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ unsigned long n_rp_qs_pending; + unsigned long n_rp_report_qs; unsigned long n_rp_cb_ready; unsigned long n_rp_cpu_needs_gp; unsigned long n_rp_gp_completed; @@ -326,6 +327,7 @@ struct rcu_state { unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + char *name; /* Name of structure. */ }; /* Return values for rcu_preempt_offline_tasks(). */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 79b53bda894..0e4f420245d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -26,6 +26,45 @@ #include <linux/delay.h> +/* + * Check the RCU kernel configuration parameters and print informative + * messages about anything out of the ordinary. If you like #ifdef, you + * will love this function. + */ +static void __init rcu_bootup_announce_oddness(void) +{ +#ifdef CONFIG_RCU_TRACE + printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); +#endif +#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) + printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", + CONFIG_RCU_FANOUT); +#endif +#ifdef CONFIG_RCU_FANOUT_EXACT + printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); +#endif +#ifdef CONFIG_RCU_FAST_NO_HZ + printk(KERN_INFO + "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); +#endif +#ifdef CONFIG_PROVE_RCU + printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); +#endif +#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE + printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); +#endif +#ifndef CONFIG_RCU_CPU_STALL_DETECTOR + printk(KERN_INFO + "\tRCU-based detection of stalled CPUs is disabled.\n"); +#endif +#ifndef CONFIG_RCU_CPU_STALL_VERBOSE + printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); +#endif +#if NUM_RCU_LVL_4 != 0 + printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); +#endif +} + #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); @@ -38,8 +77,8 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp); */ static void __init rcu_bootup_announce(void) { - printk(KERN_INFO - "Experimental preemptable hierarchical RCU implementation.\n"); + printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); + rcu_bootup_announce_oddness(); } /* @@ -75,13 +114,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); * that this just means that the task currently running on the CPU is * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. + * + * Unlike the other rcu_*_qs() functions, callers to this function + * must disable irqs in order to protect the assignment to + * ->rcu_read_unlock_special. */ static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; } /* @@ -144,9 +189,8 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - rcu_preempt_qs(cpu); local_irq_save(flags); - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + rcu_preempt_qs(cpu); local_irq_restore(flags); } @@ -236,7 +280,6 @@ static void rcu_read_unlock_special(struct task_struct *t) */ special = t->rcu_read_unlock_special; if (special & RCU_READ_UNLOCK_NEED_QS) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; rcu_preempt_qs(smp_processor_id()); } @@ -473,7 +516,6 @@ static void rcu_preempt_check_callbacks(int cpu) struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; rcu_preempt_qs(cpu); return; } @@ -515,11 +557,13 @@ void synchronize_rcu(void) if (!rcu_scheduler_active) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_rcu); @@ -754,6 +798,7 @@ void exit_rcu(void) static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Hierarchical RCU implementation.\n"); + rcu_bootup_announce_oddness(); } /* @@ -1008,6 +1053,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); int rcu_needs_cpu(int cpu) { int c = 0; + int snap; + int snap_nmi; int thatcpu; /* Check for being in the holdoff period. */ @@ -1015,12 +1062,18 @@ int rcu_needs_cpu(int cpu) return rcu_needs_cpu_quick_check(cpu); /* Don't bother unless we are the last non-dyntick-idle CPU. */ - for_each_cpu_not(thatcpu, nohz_cpu_mask) - if (thatcpu != cpu) { + for_each_online_cpu(thatcpu) { + if (thatcpu == cpu) + continue; + snap = per_cpu(rcu_dynticks, thatcpu).dynticks; + snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; + smp_mb(); /* Order sampling of snap with end of grace period. */ + if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu_needs_cpu_quick_check(cpu); } + } /* Check and update the rcu_dyntick_drain sequencing. */ if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d45db2e35d2..36c95b45738 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -241,11 +241,13 @@ static const struct file_operations rcugp_fops = { static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) { seq_printf(m, "%3d%cnp=%ld " - "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", + "qsp=%ld rpq=%ld cbr=%ld cng=%ld " + "gpc=%ld gps=%ld nf=%ld nn=%ld\n", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->n_rcu_pending, rdp->n_rp_qs_pending, + rdp->n_rp_report_qs, rdp->n_rp_cb_ready, rdp->n_rp_cpu_needs_gp, rdp->n_rp_gp_completed, diff --git a/kernel/sched.c b/kernel/sched.c index b11b80a3eed..5cd607ec840 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3663,7 +3663,7 @@ need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); - rcu_sched_qs(cpu); + rcu_note_context_switch(cpu); prev = rq->curr; switch_count = &prev->nivcsw; diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 9b49db14403..19be00ba612 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -114,7 +114,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { char path[64]; + rcu_read_lock(); cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); + rcu_read_unlock(); SEQ_printf(m, " %s", path); } #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index 7c1a67ef027..0db913a5c60 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu) preempt_enable_no_resched(); cond_resched(); preempt_disable(); - rcu_sched_qs((long)__bind_cpu); + rcu_note_context_switch((long)__bind_cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41ca394feb2..5885cdfc41f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -319,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) +/* Flag when events were overwritten */ +#define RB_MISSED_EVENTS (1 << 31) +/* Missed count stored at end */ +#define RB_MISSED_STORED (1 << 30) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -338,6 +343,7 @@ struct buffer_page { local_t write; /* index for next write */ unsigned read; /* index for next read */ local_t entries; /* entries on this page */ + unsigned long real_end; /* real end of data */ struct buffer_data_page *page; /* Actual data page */ }; @@ -417,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s) (unsigned int)sizeof(field.commit), (unsigned int)is_signed_type(long)); + ret = trace_seq_printf(s, "\tfield: int overwrite;\t" + "offset:%u;\tsize:%u;\tsigned:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + 1, + (unsigned int)is_signed_type(long)); + ret = trace_seq_printf(s, "\tfield: char data;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), @@ -440,6 +452,8 @@ struct ring_buffer_per_cpu { struct buffer_page *tail_page; /* write to tail */ struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; + unsigned long lost_events; + unsigned long last_overrun; local_t commit_overrun; local_t overrun; local_t entries; @@ -1762,6 +1776,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, kmemcheck_annotate_bitfield(event, bitfield); /* + * Save the original length to the meta data. + * This will be used by the reader to add lost event + * counter. + */ + tail_page->real_end = tail; + + /* * If this event is bigger than the minimum size, then * we need to be careful that we don't subtract the * write counter enough to allow another writer to slip @@ -2838,6 +2859,7 @@ static struct buffer_page * rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *reader = NULL; + unsigned long overwrite; unsigned long flags; int nr_loops = 0; int ret; @@ -2879,6 +2901,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->reader_page->write, 0); local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); + cpu_buffer->reader_page->real_end = 0; spin: /* @@ -2899,6 +2922,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); /* + * We want to make sure we read the overruns after we set up our + * pointers to the next object. The writer side does a + * cmpxchg to cross pages which acts as the mb on the writer + * side. Note, the reader will constantly fail the swap + * while the writer is updating the pointers, so this + * guarantees that the overwrite recorded here is the one we + * want to compare with the last_overrun. + */ + smp_mb(); + overwrite = local_read(&(cpu_buffer->overrun)); + + /* * Here's the tricky part. * * We need to move the pointer past the header page. @@ -2929,6 +2964,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page = reader; rb_reset_reader_page(cpu_buffer); + if (overwrite != cpu_buffer->last_overrun) { + cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; + cpu_buffer->last_overrun = overwrite; + } + goto again; out: @@ -3005,8 +3045,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) rb_advance_iter(iter); } +static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) +{ + return cpu_buffer->lost_events; +} + static struct ring_buffer_event * -rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) +rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_event *event; struct buffer_page *reader; @@ -3058,6 +3104,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } + if (lost_events) + *lost_events = rb_lost_events(cpu_buffer); return event; default: @@ -3168,12 +3216,14 @@ static inline int rb_ok_to_lock(void) * @buffer: The ring buffer to read * @cpu: The cpu to peak at * @ts: The timestamp counter of this event. + * @lost_events: a variable to store if events were lost (may be NULL) * * This will return the event that will be read next, but does * not consume the data. */ struct ring_buffer_event * -ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) +ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; @@ -3188,7 +3238,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) local_irq_save(flags); if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(cpu_buffer, ts); + event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); if (dolock) @@ -3230,13 +3280,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) /** * ring_buffer_consume - return an event and consume it * @buffer: The ring buffer to get the next event from + * @cpu: the cpu to read the buffer from + * @ts: a variable to store the timestamp (may be NULL) + * @lost_events: a variable to store if events were lost (may be NULL) * * Returns the next event in the ring buffer, and that event is consumed. * Meaning, that sequential reads will keep returning a different event, * and eventually empty the ring buffer if the producer is slower. */ struct ring_buffer_event * -ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) +ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event = NULL; @@ -3257,9 +3311,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) if (dolock) spin_lock(&cpu_buffer->reader_lock); - event = rb_buffer_peek(cpu_buffer, ts); - if (event) + event = rb_buffer_peek(cpu_buffer, ts, lost_events); + if (event) { + cpu_buffer->lost_events = 0; rb_advance_reader(cpu_buffer); + } if (dolock) spin_unlock(&cpu_buffer->reader_lock); @@ -3408,6 +3464,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->write_stamp = 0; cpu_buffer->read_stamp = 0; + cpu_buffer->lost_events = 0; + cpu_buffer->last_overrun = 0; + rb_head_page_activate(cpu_buffer); } @@ -3683,6 +3742,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, struct ring_buffer_event *event; struct buffer_data_page *bpage; struct buffer_page *reader; + unsigned long missed_events; unsigned long flags; unsigned int commit; unsigned int read; @@ -3719,6 +3779,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, read = reader->read; commit = rb_page_commit(reader); + /* Check if any events were dropped */ + missed_events = cpu_buffer->lost_events; + /* * If this page has been partially read or * if len is not big enough to read the rest of the page or @@ -3779,9 +3842,35 @@ int ring_buffer_read_page(struct ring_buffer *buffer, local_set(&reader->entries, 0); reader->read = 0; *data_page = bpage; + + /* + * Use the real_end for the data size, + * This gives us a chance to store the lost events + * on the page. + */ + if (reader->real_end) + local_set(&bpage->commit, reader->real_end); } ret = read; + cpu_buffer->lost_events = 0; + /* + * Set a flag in the commit field if we lost events + */ + if (missed_events) { + commit = local_read(&bpage->commit); + + /* If there is room at the end of the page to save the + * missed events, then record it there. + */ + if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) { + memcpy(&bpage->data[commit], &missed_events, + sizeof(missed_events)); + local_add(RB_MISSED_STORED, &bpage->commit); + } + local_add(RB_MISSED_EVENTS, &bpage->commit); + } + out_unlock: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index df74c798225..dc56556b55a 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -81,7 +81,7 @@ static enum event_status read_event(int cpu) int *entry; u64 ts; - event = ring_buffer_consume(buffer, cpu, &ts); + event = ring_buffer_consume(buffer, cpu, &ts, NULL); if (!event) return EVENT_DROPPED; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 44f916a0406..60f3b628973 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1545,7 +1545,8 @@ static void trace_iterator_increment(struct trace_iterator *iter) } static struct trace_entry * -peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) +peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, + unsigned long *lost_events) { struct ring_buffer_event *event; struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; @@ -1556,7 +1557,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) if (buf_iter) event = ring_buffer_iter_peek(buf_iter, ts); else - event = ring_buffer_peek(iter->tr->buffer, cpu, ts); + event = ring_buffer_peek(iter->tr->buffer, cpu, ts, + lost_events); ftrace_enable_cpu(); @@ -1564,10 +1566,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) } static struct trace_entry * -__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) +__find_next_entry(struct trace_iterator *iter, int *ent_cpu, + unsigned long *missing_events, u64 *ent_ts) { struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; + unsigned long lost_events, next_lost = 0; int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; @@ -1580,7 +1584,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) if (cpu_file > TRACE_PIPE_ALL_CPU) { if (ring_buffer_empty_cpu(buffer, cpu_file)) return NULL; - ent = peek_next_entry(iter, cpu_file, ent_ts); + ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); if (ent_cpu) *ent_cpu = cpu_file; @@ -1592,7 +1596,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) if (ring_buffer_empty_cpu(buffer, cpu)) continue; - ent = peek_next_entry(iter, cpu, &ts); + ent = peek_next_entry(iter, cpu, &ts, &lost_events); /* * Pick the entry with the smallest timestamp: @@ -1601,6 +1605,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) next = ent; next_cpu = cpu; next_ts = ts; + next_lost = lost_events; } } @@ -1610,6 +1615,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) if (ent_ts) *ent_ts = next_ts; + if (missing_events) + *missing_events = next_lost; + return next; } @@ -1617,13 +1625,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { - return __find_next_entry(iter, ent_cpu, ent_ts); + return __find_next_entry(iter, ent_cpu, NULL, ent_ts); } /* Find the next real entry, and increment the iterator to the next entry */ static void *find_next_entry_inc(struct trace_iterator *iter) { - iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); + iter->ent = __find_next_entry(iter, &iter->cpu, + &iter->lost_events, &iter->ts); if (iter->ent) trace_iterator_increment(iter); @@ -1635,7 +1644,8 @@ static void trace_consume(struct trace_iterator *iter) { /* Don't allow ftrace to trace into the ring buffers */ ftrace_disable_cpu(); - ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); + ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, + &iter->lost_events); ftrace_enable_cpu(); } @@ -2030,6 +2040,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; + if (iter->lost_events) + trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", + iter->cpu, iter->lost_events); + if (iter->trace && iter->trace->print_line) { ret = iter->trace->print_line(iter); if (ret != TRACE_TYPE_UNHANDLED) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 9aed1a5cf55..669b9c31861 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -490,9 +490,10 @@ get_return_for_leaf(struct trace_iterator *iter, * We need to consume the current entry to see * the next one. */ - ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); + ring_buffer_consume(iter->tr->buffer, iter->cpu, + NULL, NULL); event = ring_buffer_peek(iter->tr->buffer, iter->cpu, - NULL); + NULL, NULL); } if (!event) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 1cc9858258b..71fa771ee4d 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -29,7 +29,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) struct trace_entry *entry; unsigned int loops = 0; - while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { + while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) { entry = ring_buffer_event_data(event); /* |