From 1bb539ca36e21c2f4fce0865e11df384bc7b7656 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 May 2013 14:38:43 -0400 Subject: ftrace: Use the rcu _notrace variants for rcu_dereference_raw() and friends As rcu_dereference_raw() under RCU debug config options can add quite a bit of checks, and that tracing uses rcu_dereference_raw(), these checks happen with the function tracer. The function tracer also happens to trace these debug checks too. This added overhead can livelock the system. Have the function tracer use the new RCU _notrace equivalents that do not do the debug checks for RCU. Link: http://lkml.kernel.org/r/20130528184209.467603904@goodmis.org Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b549b0f5b97..6c508ff33c6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -120,22 +120,22 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); /* * Traverse the ftrace_global_list, invoking all entries. The reason that we - * can use rcu_dereference_raw() is that elements removed from this list + * can use rcu_dereference_raw_notrace() is that elements removed from this list * are simply leaked, so there is no need to interact with a grace-period - * mechanism. The rcu_dereference_raw() calls are needed to handle + * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle * concurrent insertions into the ftrace_global_list. * * Silly Alpha and silly pointer-speculation compiler optimizations! */ #define do_for_each_ftrace_op(op, list) \ - op = rcu_dereference_raw(list); \ + op = rcu_dereference_raw_notrace(list); \ do /* * Optimized for just a single item in the list (as that is the normal case). */ #define while_for_each_ftrace_op(op) \ - while (likely(op = rcu_dereference_raw((op)->next)) && \ + while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ unlikely((op) != &ftrace_list_end)) static inline void ftrace_ops_init(struct ftrace_ops *ops) @@ -779,7 +779,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) if (hlist_empty(hhd)) return NULL; - hlist_for_each_entry_rcu(rec, hhd, node) { + hlist_for_each_entry_rcu_notrace(rec, hhd, node) { if (rec->ip == ip) return rec; } @@ -1165,7 +1165,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) hhd = &hash->buckets[key]; - hlist_for_each_entry_rcu(entry, hhd, hlist) { + hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) { if (entry->ip == ip) return entry; } @@ -1422,8 +1422,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) struct ftrace_hash *notrace_hash; int ret; - filter_hash = rcu_dereference_raw(ops->filter_hash); - notrace_hash = rcu_dereference_raw(ops->notrace_hash); + filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); + notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); if ((ftrace_hash_empty(filter_hash) || ftrace_lookup_ip(filter_hash, ip)) && @@ -2920,7 +2920,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - hlist_for_each_entry_rcu(entry, hhd, node) { + hlist_for_each_entry_rcu_notrace(entry, hhd, node) { if (entry->ip == ip) entry->ops->func(ip, parent_ip, &entry->data); } -- cgit v1.2.3-70-g09d2 From 0184d50f9fd17658c232d6ee6d465a87f989d706 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 29 May 2013 15:56:49 -0400 Subject: tracing: Fix bad parameter passed in branch selftest The branch selftest calls trace_test_buffer(), but with the new code it expects the first parameter to be a pointer to a struct trace_buffer. All self tests were changed but the branch selftest was missed. This caused either a crash or failed test when the branch selftest was enabled. Link: http://lkml.kernel.org/r/20130529141333.GA24064@localhost Reported-by: Fengguang Wu Signed-off-by: Steven Rostedt --- kernel/trace/trace_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 55e2cf66967..2901e3b8859 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1159,7 +1159,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) /* stop the tracing. */ tracing_stop(); /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); + ret = trace_test_buffer(&tr->trace_buffer, &count); trace->reset(tr); tracing_start(); -- cgit v1.2.3-70-g09d2 From f17a5194859a82afe4164e938b92035b86c55794 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 30 May 2013 21:10:37 -0400 Subject: tracing: Use current_uid() for critical time tracing The irqsoff tracer records the max time that interrupts are disabled. There are hooks in the assembly code that calls back into the tracer when interrupts are disabled or enabled. When they are enabled, the tracer checks if the amount of time they were disabled is larger than the previous recorded max interrupts off time. If it is, it creates a snapshot of the currently running trace to store where the last largest interrupts off time was held and how it happened. During testing, this RCU lockdep dump appeared: [ 1257.829021] =============================== [ 1257.829021] [ INFO: suspicious RCU usage. ] [ 1257.829021] 3.10.0-rc1-test+ #171 Tainted: G W [ 1257.829021] ------------------------------- [ 1257.829021] /home/rostedt/work/git/linux-trace.git/include/linux/rcupdate.h:780 rcu_read_lock() used illegally while idle! [ 1257.829021] [ 1257.829021] other info that might help us debug this: [ 1257.829021] [ 1257.829021] [ 1257.829021] RCU used illegally from idle CPU! [ 1257.829021] rcu_scheduler_active = 1, debug_locks = 0 [ 1257.829021] RCU used illegally from extended quiescent state! [ 1257.829021] 2 locks held by trace-cmd/4831: [ 1257.829021] #0: (max_trace_lock){......}, at: [] stop_critical_timing+0x1a3/0x209 [ 1257.829021] #1: (rcu_read_lock){.+.+..}, at: [] __update_max_tr+0x88/0x1ee [ 1257.829021] [ 1257.829021] stack backtrace: [ 1257.829021] CPU: 3 PID: 4831 Comm: trace-cmd Tainted: G W 3.10.0-rc1-test+ #171 [ 1257.829021] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 [ 1257.829021] 0000000000000001 ffff880065f49da8 ffffffff8153dd2b ffff880065f49dd8 [ 1257.829021] ffffffff81092a00 ffff88006bd78680 ffff88007add7500 0000000000000003 [ 1257.829021] ffff88006bd78680 ffff880065f49e18 ffffffff810daebf ffffffff810dae5a [ 1257.829021] Call Trace: [ 1257.829021] [] dump_stack+0x19/0x1b [ 1257.829021] [] lockdep_rcu_suspicious+0x109/0x112 [ 1257.829021] [] __update_max_tr+0xed/0x1ee [ 1257.829021] [] ? __update_max_tr+0x88/0x1ee [ 1257.829021] [] ? user_enter+0xfd/0x107 [ 1257.829021] [] update_max_tr_single+0x11d/0x12d [ 1257.829021] [] ? user_enter+0xfd/0x107 [ 1257.829021] [] stop_critical_timing+0x141/0x209 [ 1257.829021] [] ? trace_hardirqs_on+0xd/0xf [ 1257.829021] [] ? user_enter+0xfd/0x107 [ 1257.829021] [] time_hardirqs_on+0x2a/0x2f [ 1257.829021] [] ? user_enter+0xfd/0x107 [ 1257.829021] [] trace_hardirqs_on_caller+0x16/0x197 [ 1257.829021] [] trace_hardirqs_on+0xd/0xf [ 1257.829021] [] user_enter+0xfd/0x107 [ 1257.829021] [] do_notify_resume+0x92/0x97 [ 1257.829021] [] int_signal+0x12/0x17 What happened was entering into the user code, the interrupts were enabled and a max interrupts off was recorded. The trace buffer was saved along with various information about the task: comm, pid, uid, priority, etc. The uid is recorded with task_uid(tsk). But this is a macro that uses rcu_read_lock() to retrieve the data, and this happened to happen where RCU is blind (user_enter). As only the preempt and irqs off tracers can have this happen, and they both only have the tsk == current, if tsk == current, use current_uid() instead of task_uid(), as current_uid() does not use RCU as only current can change its uid. This fixes the RCU suspicious splat. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4d79485b323..1a41023a1f8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -843,7 +843,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); max_data->pid = tsk->pid; - max_data->uid = task_uid(tsk); + /* + * If tsk == current, then use current_uid(), as that does not use + * RCU. The irq tracer can be called out of RCU scope. + */ + if (tsk == current) + max_data->uid = current_uid(); + else + max_data->uid = task_uid(tsk); + max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; max_data->policy = tsk->policy; max_data->rt_priority = tsk->rt_priority; -- cgit v1.2.3-70-g09d2