From 0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 23:12:58 -0400 Subject: tracing: adding function timings to function profiler If the function graph trace is enabled, the function profiler will use it to take the timing of the functions. cat /debug/tracing/trace_stat/functions Function Hit Time -------- --- ---- mwait_idle 127 183028.4 us schedule 26 151997.7 us __schedule 31 151975.1 us sys_wait4 2 74080.53 us do_wait 2 74077.80 us sys_newlstat 138 39929.16 us do_path_lookup 179 39845.79 us vfs_lstat_fd 138 39761.97 us user_path_at 153 39469.58 us path_walk 179 39435.76 us __link_path_walk 189 39143.73 us [...] Note the times are skewed due to the function graph tracer not taking into account schedules. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 67c6a21dd42..821bf49771d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -402,17 +402,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) return cnt; } -static void -trace_print_seq(struct seq_file *m, struct trace_seq *s) -{ - int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; - - s->buffer[len] = 0; - seq_puts(m, s->buffer); - - trace_seq_init(s); -} - /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer -- cgit v1.2.3-70-g09d2 From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 23:17:58 -0400 Subject: function-graph: add option to calculate graph time or not graph time is the time that a function is executing another function. Thus if function A calls B, if graph-time is set, then the time for A includes B. This is the default behavior. But if graph-time is off, then the time spent executing B is subtracted from A. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 21 ++++++++++++++++++++- kernel/trace/trace.c | 4 +++- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 8 ++++---- 5 files changed, 29 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf7..9e0a8d245e5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -365,6 +365,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -376,8 +377,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ed1fc5021d4..71e5faef12a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { struct ftrace_profile_stat *stat; + unsigned long long calltime; struct ftrace_profile *rec; unsigned long flags; @@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash) goto out; + calltime = trace->rettime - trace->calltime; + + if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + int index; + + index = trace->depth; + + /* Append this call time to the parent time to subtract */ + if (index) + current->ret_stack[index - 1].subtime += calltime; + + if (current->ret_stack[index].subtime < calltime) + calltime -= current->ret_stack[index].subtime; + else + calltime = 0; + } + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) - rec->time += trace->rettime - trace->calltime; + rec->time += calltime; + out: local_irq_restore(flags); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 821bf49771d..5d1a16cae37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | + TRACE_ITER_GRAPH_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -317,6 +318,7 @@ static const char *trace_options[] = { "latency-format", "global-clock", "sleep-time", + "graph-time", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c66ca3b6605..e3429a8ab05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -685,6 +685,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 85bba0f018b..10f6ad7d85f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; + current->ret_stack[index].subtime = 0; *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -void +static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; @@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; - barrier(); - current->curr_ret_stack--; - } /* @@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void) ftrace_pop_return_trace(&trace, &ret); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); + barrier(); + current->curr_ret_stack--; if (unlikely(!ret)) { ftrace_graph_stop(); -- cgit v1.2.3-70-g09d2 From 5452af664f6fba26b80eb2c8c4ceae2999d5cf56 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Mar 2009 00:25:38 +0100 Subject: tracing/ftrace: factorize the tracing files creation Impact: cleanup Most of the tracing files creation follow the same pattern: ret = debugfs_create_file(...) if (!ret) pr_warning("Couldn't create ... entry\n") Unify it! Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker LKML-Reference: <1238109938-11840-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 39 +++------ kernel/trace/ring_buffer.c | 7 +- kernel/trace/trace.c | 159 +++++++++++++++---------------------- kernel/trace/trace.h | 6 ++ kernel/trace/trace_event_profile.c | 1 - kernel/trace/trace_printk.c | 6 +- kernel/trace/trace_stack.c | 13 +-- kernel/trace/trace_sysprof.c | 6 +- 8 files changed, 86 insertions(+), 151 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 678e3d6caf8..6ea5a1ae6a9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2698,38 +2698,23 @@ static const struct file_operations ftrace_graph_fops = { static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("available_filter_functions", 0444, - d_tracer, NULL, &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_filter_functions' entry\n"); + trace_create_file("available_filter_functions", 0444, + d_tracer, NULL, &ftrace_avail_fops); - entry = debugfs_create_file("failures", 0444, - d_tracer, NULL, &ftrace_failures_fops); - if (!entry) - pr_warning("Could not create debugfs 'failures' entry\n"); + trace_create_file("failures", 0444, + d_tracer, NULL, &ftrace_failures_fops); - entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, - NULL, &ftrace_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_filter' entry\n"); + trace_create_file("set_ftrace_filter", 0644, d_tracer, + NULL, &ftrace_filter_fops); - entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, + trace_create_file("set_ftrace_notrace", 0644, d_tracer, NULL, &ftrace_notrace_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_notrace' entry\n"); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + trace_create_file("set_graph_function", 0444, d_tracer, NULL, &ftrace_graph_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ return 0; @@ -2987,7 +2972,6 @@ static const struct file_operations ftrace_pid_fops = { static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -2995,11 +2979,8 @@ static __init int ftrace_init_debugfs(void) ftrace_init_dyn_debugfs(d_tracer); - entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, - NULL, &ftrace_pid_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_pid' entry\n"); + trace_create_file("set_ftrace_pid", 0644, d_tracer, + NULL, &ftrace_pid_fops); ftrace_profile_debugfs(d_tracer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 960cbf44c84..74a11808c28 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2845,14 +2845,11 @@ static const struct file_operations rb_simple_fops = { static __init int rb_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_on' entry\n"); + trace_create_file("tracing_on", 0644, d_tracer, + &ring_buffer_flags, &rb_simple_fops); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 32653c8c6e2..0615751a3ed 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3581,7 +3581,7 @@ struct dentry *tracing_dentry_percpu(void) static void tracing_init_debugfs_percpu(long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(); - struct dentry *entry, *d_cpu; + struct dentry *d_cpu; /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ char cpu_dir[7]; @@ -3596,21 +3596,15 @@ static void tracing_init_debugfs_percpu(long cpu) } /* per cpu trace_pipe */ - entry = debugfs_create_file("trace_pipe", 0444, d_cpu, - (void *) cpu, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe' entry\n"); + trace_create_file("trace_pipe", 0444, d_cpu, + (void *) cpu, &tracing_pipe_fops); /* per cpu trace */ - entry = debugfs_create_file("trace", 0644, d_cpu, - (void *) cpu, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); + trace_create_file("trace", 0644, d_cpu, + (void *) cpu, &tracing_fops); - entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, - (void *) cpu, &tracing_buffers_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n"); + trace_create_file("trace_pipe_raw", 0444, d_cpu, + (void *) cpu, &tracing_buffers_fops); } #ifdef CONFIG_FTRACE_SELFTEST @@ -3766,6 +3760,22 @@ static const struct file_operations trace_options_core_fops = { .write = trace_options_core_write, }; +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + struct dentry *ret; + + ret = debugfs_create_file(name, mode, parent, data, fops); + if (!ret) + pr_warning("Could not create debugfs '%s' entry\n", name); + + return ret; +} + + static struct dentry *trace_options_init_dentry(void) { struct dentry *d_tracer; @@ -3793,7 +3803,6 @@ create_trace_option_file(struct trace_option_dentry *topt, struct tracer_opt *opt) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) @@ -3802,11 +3811,9 @@ create_trace_option_file(struct trace_option_dentry *topt, topt->flags = flags; topt->opt = opt; - entry = debugfs_create_file(opt->name, 0644, t_options, topt, + topt->entry = trace_create_file(opt->name, 0644, t_options, topt, &trace_options_fops); - topt->entry = entry; - } static struct trace_option_dentry * @@ -3861,123 +3868,81 @@ static struct dentry * create_trace_option_core_file(const char *option, long index) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) return NULL; - entry = debugfs_create_file(option, 0644, t_options, (void *)index, + return trace_create_file(option, 0644, t_options, (void *)index, &trace_options_core_fops); - - return entry; } static __init void create_trace_options_dir(void) { struct dentry *t_options; - struct dentry *entry; int i; t_options = trace_options_init_dentry(); if (!t_options) return; - for (i = 0; trace_options[i]; i++) { - entry = create_trace_option_core_file(trace_options[i], i); - if (!entry) - pr_warning("Could not create debugfs %s entry\n", - trace_options[i]); - } + for (i = 0; trace_options[i]; i++) + create_trace_option_core_file(trace_options[i], i); } static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; int cpu; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, - &global_trace, &tracing_ctrl_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); + trace_create_file("tracing_enabled", 0644, d_tracer, + &global_trace, &tracing_ctrl_fops); - entry = debugfs_create_file("trace_options", 0644, d_tracer, - NULL, &tracing_iter_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_options' entry\n"); + trace_create_file("trace_options", 0644, d_tracer, + NULL, &tracing_iter_fops); - create_trace_options_dir(); + trace_create_file("tracing_cpumask", 0644, d_tracer, + NULL, &tracing_cpumask_fops); + + trace_create_file("trace", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); + + trace_create_file("available_tracers", 0444, d_tracer, + &global_trace, &show_traces_fops); + + trace_create_file("current_tracer", 0444, d_tracer, + &global_trace, &set_tracer_fops); + + trace_create_file("tracing_max_latency", 0644, d_tracer, + &tracing_max_latency, &tracing_max_lat_fops); + + trace_create_file("tracing_thresh", 0644, d_tracer, + &tracing_thresh, &tracing_max_lat_fops); - entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, - NULL, &tracing_cpumask_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); - - entry = debugfs_create_file("trace", 0644, d_tracer, - (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); - - entry = debugfs_create_file("available_tracers", 0444, d_tracer, - &global_trace, &show_traces_fops); - if (!entry) - pr_warning("Could not create debugfs 'available_tracers' entry\n"); - - entry = debugfs_create_file("current_tracer", 0444, d_tracer, - &global_trace, &set_tracer_fops); - if (!entry) - pr_warning("Could not create debugfs 'current_tracer' entry\n"); - - entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, - &tracing_max_latency, - &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_max_latency' entry\n"); - - entry = debugfs_create_file("tracing_thresh", 0644, d_tracer, - &tracing_thresh, &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_thresh' entry\n"); - entry = debugfs_create_file("README", 0644, d_tracer, - NULL, &tracing_readme_fops); - if (!entry) - pr_warning("Could not create debugfs 'README' entry\n"); - - entry = debugfs_create_file("trace_pipe", 0444, d_tracer, + trace_create_file("README", 0644, d_tracer, + NULL, &tracing_readme_fops); + + trace_create_file("trace_pipe", 0444, d_tracer, (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_pipe' entry\n"); - - entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, - &global_trace, &tracing_entries_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'buffer_size_kb' entry\n"); - - entry = debugfs_create_file("trace_marker", 0220, d_tracer, - NULL, &tracing_mark_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_marker' entry\n"); + + trace_create_file("buffer_size_kb", 0644, d_tracer, + &global_trace, &tracing_entries_fops); + + trace_create_file("trace_marker", 0220, d_tracer, + NULL, &tracing_mark_fops); #ifdef CONFIG_DYNAMIC_FTRACE - entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, - &ftrace_update_tot_cnt, - &tracing_dyn_info_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'dyn_ftrace_total_info' entry\n"); + trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, + &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif #ifdef CONFIG_SYSPROF_TRACER init_tracer_sysprof_debugfs(d_tracer); #endif + create_trace_options_dir(); + for_each_tracing_cpu(cpu) tracing_init_debugfs_percpu(cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 47aa6d0c97a..f76a8f8689d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -470,6 +470,12 @@ void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops); + struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 22cba997077..199de9c7422 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -28,4 +28,3 @@ void ftrace_profile_disable(int event_id) return event->profile_disable(event); } } - diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index eb81556107f..9bece9687b6 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = { static __init int init_trace_printk_function_export(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) return 0; - entry = debugfs_create_file("printk_formats", 0444, d_tracer, + trace_create_file("printk_formats", 0444, d_tracer, NULL, &ftrace_formats_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'printk_formats' entry\n"); return 0; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c750f65f966..1796f00524e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace); static __init int stack_trace_init(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("stack_max_size", 0644, d_tracer, - &max_stack_size, &stack_max_size_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_max_size' entry\n"); + trace_create_file("stack_max_size", 0644, d_tracer, + &max_stack_size, &stack_max_size_fops); - entry = debugfs_create_file("stack_trace", 0444, d_tracer, - NULL, &stack_trace_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_trace' entry\n"); + trace_create_file("stack_trace", 0444, d_tracer, + NULL, &stack_trace_fops); if (stack_tracer_enabled) register_ftrace_function(&trace_ops); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 91fd19c2149..e04b76cc238 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = { void init_tracer_sysprof_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("sysprof_sample_period", 0644, + trace_create_file("sysprof_sample_period", 0644, d_tracer, NULL, &sysprof_sample_fops); - if (entry) - return; - pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n"); } -- cgit v1.2.3-70-g09d2 From e1112b4d96859367a93468027c9635e2ac04eb3f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 31 Mar 2009 00:48:49 -0500 Subject: tracing/filters: add run-time field descriptions to TRACE_EVENT_FORMAT events This patch adds run-time field descriptions to all the event formats exported using TRACE_EVENT_FORMAT. It also hooks up all the tracers that use them (i.e. the tracers in the 'ftrace subsystem') so they can also have their output filtered by the event-filtering mechanism. When I was testing this, there were a couple of things that fooled me into thinking the filters weren't working, when actually they were - I'll mention them here so others don't make the same mistakes (and file bug reports. ;-) One is that some of the tracers trace multiple events e.g. the sched_switch tracer uses the context_switch and wakeup events, and if you don't set filters on all of the traced events, the unfiltered output from the events without filters on them can make it look like the filtering as a whole isn't working properly, when actually it is doing what it was asked to do - it just wasn't asked to do the right thing. The other is that for the really high-volume tracers e.g. the function tracer, the volume of filtered events can be so high that it pushes the unfiltered events out of the ring buffer before they can be read so e.g. cat'ing the trace file repeatedly shows either no output, or once in awhile some output but that isn't there the next time you read the trace, which isn't what you normally expect when reading the trace file. If you read from the trace_pipe file though, you can catch them before they disappear. Changes from v1: As suggested by Frederic Weisbecker: - get rid of externs in functions - added unlikely() to filter_check_discard() Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/kmemtrace.c | 6 ++++ kernel/trace/trace.c | 25 ++++++++++++++++ kernel/trace/trace.h | 20 +++++++++++++ kernel/trace/trace_branch.c | 3 ++ kernel/trace/trace_event_types.h | 6 ++-- kernel/trace/trace_events.c | 7 +++++ kernel/trace/trace_events_filter.c | 4 +-- kernel/trace/trace_events_stage_2.h | 7 ----- kernel/trace/trace_export.c | 57 +++++++++++++++++++++++++++++++++++-- kernel/trace/trace_hw_branches.c | 2 ++ kernel/trace/trace_power.c | 4 +++ 11 files changed, 127 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 7a0aa0e260d..9419ad10541 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, gfp_t gfp_flags, int node) { + struct ftrace_event_call *call = &event_kmem_alloc; struct trace_array *tr = kmemtrace_array; struct kmemtrace_alloc_entry *entry; struct ring_buffer_event *event; @@ -62,6 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); @@ -71,6 +74,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, unsigned long call_site, const void *ptr) { + struct ftrace_event_call *call = &event_kmem_free; struct trace_array *tr = kmemtrace_array; struct kmemtrace_free_entry *entry; struct ring_buffer_event *event; @@ -86,6 +90,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, entry->call_site = call_site; entry->ptr = ptr; + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4865459f609..962e6179994 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -898,6 +898,7 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_function; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -912,6 +913,9 @@ trace_function(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; + + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); } @@ -921,6 +925,7 @@ static int __trace_graph_entry(struct trace_array *tr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; struct ftrace_graph_ent_entry *entry; @@ -933,6 +938,7 @@ static int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(global_trace.buffer, event); return 1; @@ -943,6 +949,7 @@ static void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; struct ftrace_graph_ret_entry *entry; @@ -955,6 +962,7 @@ static void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -973,6 +981,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, int skip, int pc) { #ifdef CONFIG_STACKTRACE + struct ftrace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -990,6 +999,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1015,6 +1025,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc) { #ifdef CONFIG_STACKTRACE + struct ftrace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -1036,6 +1047,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1052,6 +1064,7 @@ ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { + struct ftrace_event_call *call = &event_special; struct ring_buffer_event *event; struct trace_array *tr = __tr; struct special_entry *entry; @@ -1064,6 +1077,7 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, pc); } @@ -1080,6 +1094,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_context_switch; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -1095,6 +1110,9 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); + + filter_check_discard(call, entry, event); + trace_buffer_unlock_commit(tr, event, flags, pc); } @@ -1104,6 +1122,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -1120,6 +1139,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); @@ -1221,6 +1242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static u32 trace_buf[TRACE_BUF_SIZE]; + struct ftrace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1260,6 +1282,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); out_unlock: @@ -1279,6 +1302,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; + struct ftrace_event_call *call = &event_print; struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1314,6 +1338,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); out_unlock: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 34b94c3f40a..e7737281953 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -866,6 +866,21 @@ extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); +static inline void +filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer_event *event) +{ + if (unlikely(call->preds) && !filter_match_preds(call, rec)) + ring_buffer_event_discard(event); +} + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; @@ -897,4 +912,9 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ + extern struct ftrace_event_call event_##call; +#include "trace_event_types.h" + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index e6e32912ffb..c95c25d838e 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -30,6 +30,7 @@ static struct trace_array *branch_tracer; static void probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) { + struct ftrace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; @@ -73,6 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); out: diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index fd78bee71dd..95b147aac22 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned int, line, line) - TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func) - TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) + TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, + TRACE_FUNC_SIZE+1, func) + TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, + TRACE_FUNC_SIZE+1, file) TRACE_FIELD(char, correct, correct) ), TP_RAW_FMT("%u:%s:%s (%u)") diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64ec4d278ff..be9299a53e2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -680,6 +680,7 @@ static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { struct event_subsystem *system; + struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { @@ -708,6 +709,12 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->preds = NULL; + entry = debugfs_create_file("filter", 0644, system->entry, system, + &ftrace_subsystem_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", name); + return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 026be412f35..470ad9487ec 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -185,7 +185,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system) } events_for_each(call) { - if (!call->name || !call->regfunc) + if (!call->define_fields) continue; if (!strcmp(call->system, system->name)) @@ -324,7 +324,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, events_for_each(call) { int err; - if (!call->name || !call->regfunc) + if (!call->define_fields) continue; if (strcmp(call->system, system->name)) diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 30743f7d411..1c94b87c718 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -146,13 +146,6 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; -#define __common_field(type, item) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ - if (ret) \ - return ret; - #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 07a22c33ebf..f4e46616c48 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -30,7 +30,7 @@ #undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ +#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ @@ -85,18 +85,69 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_ENTRY entry #undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ +#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ cmd; #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int ftrace_define_fields_##call(void); \ +static int ftrace_raw_init_event_##call(void); \ \ -static struct ftrace_event_call __used \ +struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ +}; \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + INIT_LIST_HEAD(&event_##call.fields); \ + return 0; \ +} \ + +#include "trace_event_types.h" + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_ZERO_CHAR +#define TRACE_FIELD_ZERO_CHAR(item) + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + struct args field; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ } + #include "trace_event_types.h" diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 7bfdf4c2347..e6b275b22ac 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -168,6 +168,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) void trace_hw_branch(u64 from, u64 to) { + struct ftrace_event_call *call = &event_hw_branch; struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; @@ -194,6 +195,7 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, 0); out: diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index bae791ebcc5..8ce7d7d62c0 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type, static void probe_power_end(struct power_trace *it) { + struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; @@ -54,6 +55,7 @@ static void probe_power_end(struct power_trace *it) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); @@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it) static void probe_power_mark(struct power_trace *it, unsigned int type, unsigned int level) { + struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; @@ -84,6 +87,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); -- cgit v1.2.3-70-g09d2 From e45f2e2bd298e1ff687448e5fd15a3588b5807ec Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 31 Mar 2009 00:49:16 -0500 Subject: tracing/filters: add TRACE_EVENT_FORMAT_NOFILTER event macro Frederic Weisbecker suggested that the trace_special event shouldn't be filterable; this patch adds a TRACE_EVENT_FORMAT_NOFILTER event macro that allows an event format to be exported without having a filter attached, and removes filtering from the trace_special event. Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 -- kernel/trace/trace.h | 2 ++ kernel/trace/trace_event_types.h | 2 +- kernel/trace/trace_export.c | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 962e6179994..c209d214169 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1064,7 +1064,6 @@ ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { - struct ftrace_event_call *call = &event_special; struct ring_buffer_event *event; struct trace_array *tr = __tr; struct special_entry *entry; @@ -1077,7 +1076,6 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, pc); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e7737281953..3cf856fa597 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -915,6 +915,8 @@ do { \ #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ extern struct ftrace_event_call event_##call; +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) #include "trace_event_types.h" #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 95b147aac22..cfcecc4fd86 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); -TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, +TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, arg1, arg1) TRACE_FIELD(unsigned long, arg2, arg2) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index f4e46616c48..77c494f5e1d 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -65,6 +65,22 @@ ftrace_format_##call(struct trace_seq *s) \ return ret; \ } +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct args field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + \ + return ret; \ +} + #include "trace_event_types.h" #undef TRACE_ZERO_CHAR @@ -109,6 +125,19 @@ static int ftrace_raw_init_event_##call(void) \ return 0; \ } \ +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) \ + \ +struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .id = proto, \ + .system = __stringify(TRACE_SYSTEM), \ + .show_format = ftrace_format_##call, \ +}; + #include "trace_event_types.h" #undef TRACE_FIELD @@ -150,4 +179,8 @@ ftrace_define_fields_##call(void) \ return ret; \ } +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) + #include "trace_event_types.h" -- cgit v1.2.3-70-g09d2 From 77d9f465d46fd67cdb82ee5e1ab99dd57a17c486 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Apr 2009 01:16:59 -0400 Subject: tracing/filters: use ring_buffer_discard_commit for discarded events The ring_buffer_discard_commit makes better usage of the ring_buffer when an event has been discarded. It tries to remove it completely if possible. This patch converts the trace event filtering to use ring_buffer_discard_commit instead of the ring_buffer_event_discard. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 9 +++++++-- kernel/trace/trace.h | 1 + kernel/trace/trace_events_stage_3.h | 6 +++--- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c209d214169..d880ab2772c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -884,13 +884,18 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); + __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); } void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); + __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); +} + +void trace_current_buffer_discard_commit(struct ring_buffer_event *event) +{ + ring_buffer_discard_commit(global_trace.buffer, event); } void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3cf856fa597..dfefffd7ae3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -497,6 +497,7 @@ void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 9d2fa78cecc..d2f34bf30e5 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -223,9 +223,9 @@ static void ftrace_raw_event_##call(proto) \ assign; \ \ if (call->preds && !filter_match_preds(call, entry)) \ - ring_buffer_event_discard(event); \ - \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ + trace_current_buffer_discard_commit(event); \ + else \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ \ } \ \ -- cgit v1.2.3-70-g09d2 From eb02ce017dd83985041a7e54c6449f92d53b026f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 8 Apr 2009 03:15:54 -0500 Subject: tracing/filters: use ring_buffer_discard_commit() in filter_check_discard() This patch changes filter_check_discard() to make use of the new ring_buffer_discard_commit() function and modifies the current users to call the old commit function in the non-discard case. It also introduces a version of filter_check_discard() that uses the global trace buffer (filter_current_check_discard()) for those cases. v2 changes: - fix compile error noticed by Ingo Molnar Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: fweisbec@gmail.com LKML-Reference: <1239178554.10295.36.camel@tropicana> Signed-off-by: Ingo Molnar --- kernel/trace/kmemtrace.c | 10 ++++----- kernel/trace/trace.c | 45 ++++++++++++++++++++----------------- kernel/trace/trace.h | 14 +++++++++--- kernel/trace/trace_branch.c | 5 ++--- kernel/trace/trace_events_stage_3.h | 5 +---- kernel/trace/trace_hw_branches.c | 4 ++-- kernel/trace/trace_power.c | 8 +++---- 7 files changed, 48 insertions(+), 43 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 9419ad10541..86cdf671d7e 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -63,9 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } @@ -90,9 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, entry->call_site = call_site; entry->ptr = ptr; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d880ab2772c..c0047fcf707 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -171,6 +171,12 @@ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +int filter_current_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer_event *event) +{ + return filter_check_discard(call, rec, global_trace.buffer, event); +} + cycle_t ftrace_now(int cpu) { u64 ts; @@ -919,9 +925,8 @@ trace_function(struct trace_array *tr, entry->ip = ip; entry->parent_ip = parent_ip; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -943,8 +948,8 @@ static int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(global_trace.buffer, event); + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(global_trace.buffer, event); return 1; } @@ -967,8 +972,8 @@ static void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(global_trace.buffer, event); + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -1004,8 +1009,8 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1052,8 +1057,8 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1114,9 +1119,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); - filter_check_discard(call, entry, event); - - trace_buffer_unlock_commit(tr, event, flags, pc); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -1142,9 +1146,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); } @@ -1285,8 +1288,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); @@ -1341,8 +1344,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dfefffd7ae3..9729d14767d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -866,13 +866,21 @@ extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); -static inline void +static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->preds) && !filter_match_preds(call, rec)) - ring_buffer_event_discard(event); + if (unlikely(call->preds) && !filter_match_preds(call, rec)) { + ring_buffer_discard_commit(buffer, event); + return 1; + } + + return 0; } #define __common_field(type, item) \ diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index c95c25d838e..8e64e604f5a 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -74,9 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index d2f34bf30e5..b2b298269eb 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -222,11 +222,8 @@ static void ftrace_raw_event_##call(proto) \ \ assign; \ \ - if (call->preds && !filter_match_preds(call, entry)) \ - trace_current_buffer_discard_commit(event); \ - else \ + if (!filter_current_check_discard(call, entry, event)) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ - \ } \ \ static int ftrace_raw_reg_event_##call(void) \ diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index e6b275b22ac..8683d50a753 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -195,8 +195,8 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; - filter_check_discard(call, entry, event); - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index 8ce7d7d62c0..810a5b7cf1c 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -55,8 +55,8 @@ static void probe_power_end(struct power_trace *it) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - filter_check_discard(call, entry, event); - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -87,8 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - filter_check_discard(call, entry, event); - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } -- cgit v1.2.3-70-g09d2 From 17c873ec280a03894bc718af817f7f24fa787ae1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 18:12:50 -0400 Subject: tracing/events: add export symbols for trace events in modules Impact: let modules add trace events The trace event code requires some functions to be exported to allow modules to use TRACE_EVENT. This patch adds EXPORT_SYMBOL_GPL to the necessary functions. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 3 +++ kernel/trace/trace_events.c | 1 + kernel/trace/trace_events_filter.c | 2 ++ kernel/trace/trace_output.c | 3 +++ 4 files changed, 9 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c0047fcf707..2d69b26b3cc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,6 +176,7 @@ int filter_current_check_discard(struct ftrace_event_call *call, void *rec, { return filter_check_discard(call, rec, global_trace.buffer, event); } +EXPORT_SYMBOL_GPL(filter_current_check_discard); cycle_t ftrace_now(int cpu) { @@ -886,6 +887,7 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, return trace_buffer_lock_reserve(&global_trace, type, len, flags, pc); } +EXPORT_SYMBOL(trace_current_buffer_lock_reserve); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) @@ -903,6 +905,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event) { ring_buffer_discard_commit(global_trace.buffer, event); } +EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); void trace_function(struct trace_array *tr, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5c66aaff07c..8b9e621b80b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -53,6 +53,7 @@ err: return -ENOMEM; } +EXPORT_SYMBOL_GPL(trace_define_field); static void ftrace_clear_events(void) { diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index d30b06b02b4..f8e5eab0424 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -110,6 +110,7 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) return 1; } +EXPORT_SYMBOL_GPL(filter_match_preds); void filter_print_preds(struct filter_pred **preds, int n_preds, struct trace_seq *s) @@ -220,6 +221,7 @@ oom: return -ENOMEM; } +EXPORT_SYMBOL_GPL(init_preds); void filter_free_subsystem_preds(struct event_subsystem *system) { diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0e70fb07ca7..83a8abb9640 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -94,6 +94,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +EXPORT_SYMBOL_GPL(trace_seq_printf); int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { @@ -538,6 +539,7 @@ int register_ftrace_event(struct trace_event *event) return ret; } +EXPORT_SYMBOL_GPL(register_ftrace_event); /** * unregister_ftrace_event - remove a no longer used event @@ -551,6 +553,7 @@ int unregister_ftrace_event(struct trace_event *event) return 0; } +EXPORT_SYMBOL_GPL(unregister_ftrace_event); /* * Standard events -- cgit v1.2.3-70-g09d2 From 69abe6a5d18a9394baa325bab8f57748b037c517 Mon Sep 17 00:00:00 2001 From: Avadh Patel Date: Fri, 10 Apr 2009 16:04:48 -0400 Subject: tracing: add saved_cmdlines file to show cached task comms Export the cached task comms to userspace. This allows user apps to translate the pids from a trace into their respective task command lines. [ Impact: let userspace apps reading binary buffer know comm's of pids ] Signed-off-by: Avadh Patel [ added error checking and use of buf pointer to index file_buf ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2d69b26b3cc..031c46f11bb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2421,6 +2421,56 @@ static const struct file_operations tracing_readme_fops = { .read = tracing_readme_read, }; +static ssize_t +tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf_comm; + char *file_buf; + char *buf; + int len = 0; + int pid; + int i; + + file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); + if (!file_buf) + return -ENOMEM; + + buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); + if (!buf_comm) { + kfree(file_buf); + return -ENOMEM; + } + + buf = file_buf; + + for (i = 0; i < SAVED_CMDLINES; i++) { + int r; + + pid = map_cmdline_to_pid[i]; + if (pid == -1 || pid == NO_CMDLINE_MAP) + continue; + + trace_find_cmdline(pid, buf_comm); + r = sprintf(buf, "%d %s\n", pid, buf_comm); + buf += r; + len += r; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, + file_buf, len); + + kfree(file_buf); + kfree(buf_comm); + + return len; +} + +static const struct file_operations tracing_saved_cmdlines_fops = { + .open = tracing_open_generic, + .read = tracing_saved_cmdlines_read, +}; + static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -3973,6 +4023,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); + trace_create_file("saved_cmdlines", 0444, d_tracer, + NULL, &tracing_saved_cmdlines_fops); + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); -- cgit v1.2.3-70-g09d2 From 339ae5d3c3fc2025e3657637921495fd600027c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 17 Apr 2009 10:34:30 +0800 Subject: tracing: fix file mode of trace and README trace is read-write and README is read-only. [ Impact: fix /debug/tracing/ file permissions. ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <49E7EAB6.4070605@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 031c46f11bb..f681f646aa0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4002,7 +4002,7 @@ static __init int tracer_init_debugfs(void) trace_create_file("available_tracers", 0444, d_tracer, &global_trace, &show_traces_fops); - trace_create_file("current_tracer", 0444, d_tracer, + trace_create_file("current_tracer", 0644, d_tracer, &global_trace, &set_tracer_fops); trace_create_file("tracing_max_latency", 0644, d_tracer, @@ -4011,7 +4011,7 @@ static __init int tracer_init_debugfs(void) trace_create_file("tracing_thresh", 0644, d_tracer, &tracing_thresh, &tracing_max_lat_fops); - trace_create_file("README", 0644, d_tracer, + trace_create_file("README", 0444, d_tracer, NULL, &tracing_readme_fops); trace_create_file("trace_pipe", 0444, d_tracer, -- cgit v1.2.3-70-g09d2 From 12acd473d45cf2e40de3782cb2de712e5cd4d715 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 16:01:56 -0400 Subject: tracing: add EXPORT_SYMBOL_GPL for trace commits Not all the necessary symbols were exported to allow for tracing by modules. This patch adds them in. [ Impact: allow modules to commit data to the ring buffer ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f681f646aa0..183d788038e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -894,18 +894,20 @@ void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, { __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); } +EXPORT_SYMBOL(trace_current_buffer_unlock_commit); void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); } +EXPORT_SYMBOL(trace_nowake_buffer_unlock_commit); void trace_current_buffer_discard_commit(struct ring_buffer_event *event) { ring_buffer_discard_commit(global_trace.buffer, event); } -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); +EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); void trace_function(struct trace_array *tr, -- cgit v1.2.3-70-g09d2 From 3189cdb31622f4e40688ce5a6fc5d940b42bc805 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 16:13:55 -0400 Subject: tracing: protect trace_printk from recursion trace_printk can be called from any context, including NMIs. If this happens, then we must test for for recursion before grabbing any spinlocks. This patch prevents trace_printk from being called recursively. [ Impact: prevent hard lockup in lockdep event tracer ] Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 183d788038e..b9a3adce922 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1259,6 +1259,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct trace_array_cpu *data; struct bprint_entry *entry; unsigned long flags; + int disable; int resched; int cpu, len = 0, size, pc; @@ -1273,7 +1274,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(atomic_read(&data->disabled))) + disable = atomic_inc_return(&data->disabled); + if (unlikely(disable != 1)) goto out; /* Lockdep uses trace_printk for lock tracing */ @@ -1301,6 +1303,7 @@ out_unlock: local_irq_restore(flags); out: + atomic_dec_return(&data->disabled); ftrace_preempt_enable(resched); unpause_graph_tracing(); @@ -1320,6 +1323,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) int cpu, len = 0, size, pc; struct print_entry *entry; unsigned long irq_flags; + int disable; if (tracing_disabled || tracing_selftest_running) return 0; @@ -1329,7 +1333,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(atomic_read(&data->disabled))) + disable = atomic_inc_return(&data->disabled); + if (unlikely(disable != 1)) goto out; pause_graph_tracing(); @@ -1357,6 +1362,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) raw_local_irq_restore(irq_flags); unpause_graph_tracing(); out: + atomic_dec_return(&data->disabled); preempt_enable_notrace(); return len; -- cgit v1.2.3-70-g09d2 From 7a4f453b6d7379a7c380825949977c5a838aa012 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Apr 2009 16:53:34 +0800 Subject: tracing/events: make struct trace_entry->type to be int type struct trace_entry->type is unsigned char, while trace event's id is int type, thus for a event with id >= 256, it's entry->type is cast to (id % 256), and then we can't see the trace output of this event. # insmod trace-events-sample.ko # echo foo_bar > /mnt/tracing/set_event # cat /debug/tracing/events/trace-events-sample/foo_bar/id 256 # cat /mnt/tracing/trace_pipe <...>-3548 [001] 215.091142: Unknown type 0 <...>-3548 [001] 216.089207: Unknown type 0 <...>-3548 [001] 217.087271: Unknown type 0 <...>-3548 [001] 218.085332: Unknown type 0 [ Impact: fix output for trace events with id >= 256 ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <49EEDB0E.5070207@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 ++-- include/trace/ftrace.h | 2 +- kernel/trace/trace.c | 4 ++-- kernel/trace/trace.h | 2 +- kernel/trace/trace_events.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 75f3ac01a87..2a4a4074991 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,7 +16,7 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - unsigned char type; + int type; unsigned char flags; unsigned char preempt_count; int pid; @@ -73,7 +73,7 @@ enum print_line_t { struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 39a3351f2e7..15ef08d9add 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -198,7 +198,7 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(unsigned char, type); \ + __common_field(int, type); \ __common_field(unsigned char, flags); \ __common_field(unsigned char, preempt_count); \ __common_field(int, pid); \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b9a3adce922..b6183bc9eca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -838,7 +838,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc) { @@ -881,7 +881,7 @@ void trace_buffer_unlock_commit(struct trace_array *tr, } struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc) { return trace_buffer_lock_reserve(&global_trace, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 247948e81b0..7d55bcf50e4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -422,7 +422,7 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct ring_buffer_event; struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9ea55a7dfde..5d6e879cf87 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", - FIELD(unsigned char, type), + FIELD(int, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), FIELD(int, pid), -- cgit v1.2.3-70-g09d2 From cd891ae0305601bdb4d2e7e85282961c4ff256cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Apr 2009 11:39:34 -0400 Subject: tracing: convert ftrace_dump spinlocks to raw ftrace_dump is used for printing out the contents of the ftrace ring buffer to the console on failure. Currently it uses a spinlock to synchronize the output from multiple failures on different CPUs. This spin lock currently is a normal spinlock and can cause issues with lockdep and lock tracing. This patch converts it to raw since it is for error handling only. The lock is local to the ftrace_dump and is not used by any other infrastructure. [ Impact: prevent ftrace_dump from locking up by internal tracing ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b6183bc9eca..5d704a41f83 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4114,7 +4114,8 @@ trace_printk_seq(struct trace_seq *s) static void __ftrace_dump(bool disable_tracing) { - static DEFINE_SPINLOCK(ftrace_dump_lock); + static raw_spinlock_t ftrace_dump_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; unsigned int old_userobj; @@ -4123,7 +4124,8 @@ static void __ftrace_dump(bool disable_tracing) int cnt = 0, cpu; /* only one dump */ - spin_lock_irqsave(&ftrace_dump_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&ftrace_dump_lock); if (dump_ran) goto out; @@ -4195,7 +4197,8 @@ static void __ftrace_dump(bool disable_tracing) } out: - spin_unlock_irqrestore(&ftrace_dump_lock, flags); + __raw_spin_unlock(&ftrace_dump_lock); + local_irq_restore(flags); } /* By default: disable tracing after the dump */ -- cgit v1.2.3-70-g09d2 From 5beae6efd1004b44c3e257dc96087978e4c763c1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:16:21 -0400 Subject: tracing: fix ref count in splice pages The pages allocated for the splice binary buffer did not initialize the ref count correctly. This caused pages not to be freed and causes a drastic memory leak. Thanks to logdev I was able to trace the tracer to find where the leak was. [ Impact: stop memory leak when using splice ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5d704a41f83..9058240c85c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3531,6 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if (!ref) break; + ref->ref = 1; ref->buffer = info->tr->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer); if (!ref->page) { -- cgit v1.2.3-70-g09d2 From 93459c6cb9816c52200993d29dd18cea1daee335 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:23:13 -0400 Subject: tracing: only add splice page if entries exist The splice code allocates a page even when the ring buffer is empty. It detects the ring buffer being empty when it it fails to copy anything from the ring buffer into the page. This patch adds a check to see if there is anything in the ring buffer before allocating a page. Thanks to logdev for letting me trace the tracer to find this. [ Impact: speed up due to removing unnecessary allocation ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9058240c85c..0aeb3b93414 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3508,7 +3508,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int size, i; + int entries, size, i; size_t ret; if (*ppos & (PAGE_SIZE - 1)) { @@ -3523,7 +3523,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } - for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); + + for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -3564,6 +3566,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; *ppos += PAGE_SIZE; + + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); } spd.nr_pages = i; -- cgit v1.2.3-70-g09d2 From f2957f1f196b0217644a17c1379855a118a37d72 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:26:30 -0400 Subject: tracing: have splice only copy full pages Splice works with pages, it is much more effecient to use an entire page than to copy bits over several pages. Using logdev to trace the internals of the splice mechanism, I was able to see that splice can be very aggressive. When tracing is occurring, and the reader caught up to the writer, and the writer is on the reader page, the reader will copy what is there into the splice page. Splice may iterate over several pages and if the writer is still writing to the page, the reader will keep copying bits to new pages to pass to userspace. This patch changes it to only pass data to userspace if the page is full (the writer has left the page). This has a small side effect that splice can not read a partial page, and must wait for the page to fill. This should not be an issue. If tracing has stopped, then a use of "read" will still read all of the page. [ Impact: better performance for ring buffer splice code ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0aeb3b93414..f5427e0fc98 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3542,7 +3542,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } r = ring_buffer_read_page(ref->buffer, &ref->page, - len, info->cpu, 0); + len, info->cpu, 1); if (r < 0) { ring_buffer_free_read_page(ref->buffer, ref->page); -- cgit v1.2.3-70-g09d2 From c8d771835e18c938dae8690611d65fe98ad30f58 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 18:03:45 -0400 Subject: tracing: export stats of ring buffers to userspace This patch adds stats to the ftrace ring buffers: # cat /debugfs/tracing/per_cpu/cpu0/stats entries: 42360 overrun: 30509326 commit overrun: 0 nmi dropped: 0 Where entries are the total number of data entries in the buffer. overrun is the number of entries not consumed and were overwritten by the writer. commit overrun is the number of entries dropped due to nested writers wrapping the buffer before the initial writer finished the commit. nmi dropped is the number of entries dropped due to the ring buffer lock being held when an nmi was going to write to the ring buffer. Note, this field will be meaningless and will go away when the ring buffer becomes lockless. [ Impact: let userspace know what is happening in the ring buffers ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f5427e0fc98..74df029056b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3595,6 +3595,45 @@ static const struct file_operations tracing_buffers_fops = { .llseek = no_llseek, }; +static ssize_t +tracing_stats_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + unsigned long cpu = (unsigned long)filp->private_data; + struct trace_array *tr = &global_trace; + struct trace_seq *s; + unsigned long cnt; + + s = kmalloc(sizeof(*s), GFP_ATOMIC); + if (!s) + return ENOMEM; + + trace_seq_init(s); + + cnt = ring_buffer_entries_cpu(tr->buffer, cpu); + trace_seq_printf(s, "entries: %ld\n", cnt); + + cnt = ring_buffer_overrun_cpu(tr->buffer, cpu); + trace_seq_printf(s, "overrun: %ld\n", cnt); + + cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); + trace_seq_printf(s, "commit overrun: %ld\n", cnt); + + cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu); + trace_seq_printf(s, "nmi dropped: %ld\n", cnt); + + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); + + kfree(s); + + return count; +} + +static const struct file_operations tracing_stats_fops = { + .open = tracing_open_generic, + .read = tracing_stats_read, +}; + #ifdef CONFIG_DYNAMIC_FTRACE int __weak ftrace_arch_read_dyn_info(char *buf, int size) @@ -3708,6 +3747,9 @@ static void tracing_init_debugfs_percpu(long cpu) trace_create_file("trace_pipe_raw", 0444, d_cpu, (void *) cpu, &tracing_buffers_fops); + + trace_create_file("stats", 0444, d_cpu, + (void *) cpu, &tracing_stats_fops); } #ifdef CONFIG_FTRACE_SELFTEST -- cgit v1.2.3-70-g09d2 From 94487d6d53af5acae10cf9fd52f74498994d46b1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 May 2009 19:22:53 -0400 Subject: tracing: use proper export symbol for tracing api When adding the EXPORT_SYMBOL to some of the tracing API, I accidently used EXPORT_SYMBOL instead of EXPORT_SYMBOL_GPL. This patch fixes that mistake. [ Impact: export the tracing code only for GPL modules ] Reported-by: Christoph Hellwig Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 74df029056b..4164a344e72 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -887,21 +887,21 @@ trace_current_buffer_lock_reserve(int type, unsigned long len, return trace_buffer_lock_reserve(&global_trace, type, len, flags, pc); } -EXPORT_SYMBOL(trace_current_buffer_lock_reserve); +EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); } -EXPORT_SYMBOL(trace_current_buffer_unlock_commit); +EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); } -EXPORT_SYMBOL(trace_nowake_buffer_unlock_commit); +EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); void trace_current_buffer_discard_commit(struct ring_buffer_event *event) { -- cgit v1.2.3-70-g09d2 From 9456f0fa6d3cb944d3b9fc31c9a244e0362c26ea Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 May 2009 21:54:09 -0400 Subject: tracing: reset ring buffer when removing modules with events Li Zefan found that there's a race using the event ids of events and modules. When a module is loaded, an event id is incremented. We only have 16 bits for event ids (65536) and there is a possible (but highly unlikely) race that we could load and unload a module that registers events so many times that the event id counter overflows. When it overflows, it then restarts and goes looking for available ids. An id is available if it was added by a module and released. The race is if you have one module add an id, and then is removed. Another module loaded can use that same event id. But if the old module still had events in the ring buffer, the new module's call back would get bogus data. At best (and most likely) the output would just be garbage. But if the module for some reason used pointers (not recommended) then this could potentially crash. The safest thing to do is just reset the ring buffer if a module that registered events is removed. [ Impact: prevent unpredictable results of event id overflows ] Reported-by: Li Zefan LKML-Reference: <49FEAFD0.30106@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 10 ++++++++++ kernel/trace/trace.h | 2 ++ kernel/trace/trace_events.c | 9 +++++++++ 3 files changed, 21 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4164a344e72..dd40d232034 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -639,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr) tracing_reset(tr, cpu); } +void tracing_reset_current(int cpu) +{ + tracing_reset(&global_trace, cpu); +} + +void tracing_reset_current_online_cpus(void) +{ + tracing_reset_online_cpus(&global_trace); +} + #define SAVED_CMDLINES 128 #define NO_CMDLINE_MAP UINT_MAX static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 777c6c3a0cd..ba25793ffe6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -409,6 +409,8 @@ int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); +void tracing_reset_current(int cpu); +void tracing_reset_current_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *trace_create_file(const char *name, mode_t mode, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8d579ff2361..6d2c842a024 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -932,9 +932,11 @@ static void trace_module_remove_events(struct module *mod) { struct ftrace_module_file_ops *file_ops; struct ftrace_event_call *call, *p; + bool found = false; list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) { + found = true; if (call->enabled) { call->enabled = 0; call->unregfunc(); @@ -957,6 +959,13 @@ static void trace_module_remove_events(struct module *mod) list_del(&file_ops->list); kfree(file_ops); } + + /* + * It is safest to reset the ring buffer if the module being unloaded + * registered any events. + */ + if (found) + tracing_reset_current_online_cpus(); } static int trace_module_notify(struct notifier_block *self, -- cgit v1.2.3-70-g09d2 From 4f5359685af6de7dca101393dc606620adbe963f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 18 May 2009 19:35:34 +0800 Subject: tracing: add trace_event_read_lock() I found that there is nothing to protect event_hash in ftrace_find_event(). Rcu protects the event hashlist but not the event itself while we use it after its extraction through ftrace_find_event(). This lack of a proper locking in this spot opens a race window between any event dereferencing and module removal. Eg: --Task A-- print_trace_line(trace) { event = find_ftrace_event(trace) --Task B-- trace_module_remove_events(mod) { list_trace_events_module(ev, mod) { unregister_ftrace_event(ev->event) { hlist_del(ev->event->node) list_del(....) } } } |--> module removed, the event has been dropped --Task A-- event->print(trace); // Dereferencing freed memory If the event retrieved belongs to a module and this module is concurrently removed, we may end up dereferencing a data from a freed module. RCU could solve this, but it would add latency to the kernel and forbid tracers output callbacks to call any sleepable code. So this fix converts 'trace_event_mutex' to a read/write semaphore, and adds trace_event_read_lock() to protect ftrace_find_event(). [ Impact: fix possible freed memory dereference in ftrace ] Signed-off-by: Lai Jiangshan Acked-by: Steven Rostedt LKML-Reference: <4A114806.7090302@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.c | 8 ++++++++ kernel/trace/trace_output.c | 25 ++++++++++++++++++------- kernel/trace/trace_output.h | 2 ++ 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dd40d232034..02d32baa23a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1569,12 +1569,14 @@ static void *s_start(struct seq_file *m, loff_t *pos) p = s_next(m, p, &l); } + trace_event_read_lock(); return p; } static void s_stop(struct seq_file *m, void *p) { atomic_dec(&trace_record_cmdline_disabled); + trace_event_read_unlock(); } static void print_lat_help_header(struct seq_file *m) @@ -1817,6 +1819,7 @@ static int trace_empty(struct trace_iterator *iter) return 1; } +/* Called with trace_event_read_lock() held. */ static enum print_line_t print_trace_line(struct trace_iterator *iter) { enum print_line_t ret; @@ -3008,6 +3011,7 @@ waitagain: offsetof(struct trace_iterator, seq)); iter->pos = -1; + trace_event_read_lock(); while (find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int len = iter->seq.len; @@ -3024,6 +3028,7 @@ waitagain: if (iter->seq.len >= cnt) break; } + trace_event_read_unlock(); /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); @@ -3146,6 +3151,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, goto out_err; } + trace_event_read_lock(); + /* Fill as many pages as possible. */ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -3168,6 +3175,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_seq_init(&iter->seq); } + trace_event_read_unlock(); mutex_unlock(&iter->mutex); spd.nr_pages = i; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 489c0e8ada0..7136420603a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -14,7 +14,7 @@ /* must be a power of 2 */ #define EVENT_HASHSIZE 128 -static DEFINE_MUTEX(trace_event_mutex); +static DECLARE_RWSEM(trace_event_mutex); static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; @@ -466,6 +466,7 @@ static int task_state_char(unsigned long state) * @type: the type of event to look for * * Returns an event of type @type otherwise NULL + * Called with trace_event_read_lock() held. */ struct trace_event *ftrace_find_event(int type) { @@ -475,7 +476,7 @@ struct trace_event *ftrace_find_event(int type) key = type & (EVENT_HASHSIZE - 1); - hlist_for_each_entry_rcu(event, n, &event_hash[key], node) { + hlist_for_each_entry(event, n, &event_hash[key], node) { if (event->type == type) return event; } @@ -513,6 +514,16 @@ static int trace_search_list(struct list_head **list) return last + 1; } +void trace_event_read_lock(void) +{ + down_read(&trace_event_mutex); +} + +void trace_event_read_unlock(void) +{ + up_read(&trace_event_mutex); +} + /** * register_ftrace_event - register output for an event type * @event: the event type to register @@ -533,7 +544,7 @@ int register_ftrace_event(struct trace_event *event) unsigned key; int ret = 0; - mutex_lock(&trace_event_mutex); + down_write(&trace_event_mutex); if (WARN_ON(!event)) goto out; @@ -581,11 +592,11 @@ int register_ftrace_event(struct trace_event *event) key = event->type & (EVENT_HASHSIZE - 1); - hlist_add_head_rcu(&event->node, &event_hash[key]); + hlist_add_head(&event->node, &event_hash[key]); ret = event->type; out: - mutex_unlock(&trace_event_mutex); + up_write(&trace_event_mutex); return ret; } @@ -597,10 +608,10 @@ EXPORT_SYMBOL_GPL(register_ftrace_event); */ int unregister_ftrace_event(struct trace_event *event) { - mutex_lock(&trace_event_mutex); + down_write(&trace_event_mutex); hlist_del(&event->node); list_del(&event->list); - mutex_unlock(&trace_event_mutex); + up_write(&trace_event_mutex); return 0; } diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 6e220a8e570..ac240e76eb0 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -20,6 +20,8 @@ extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); +extern void trace_event_read_lock(void); +extern void trace_event_read_unlock(void); extern struct trace_event *ftrace_find_event(int type); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, -- cgit v1.2.3-70-g09d2 From 5b6045a906f48d37591365c5dcdd6d1d146bfd4a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 26 May 2009 17:28:02 +0200 Subject: trace: disable preemption before taking raw spinlocks s390 code uses smp_processor_id() in __raw_spin_lock() code which reveals that a (raw) spinlock is taken without preemption disabled. This can potentially deadlock. To fix this explicitly disable and enable preemption. BUG: using smp_processor_id() in preemptible [00000000] code: cat/2278 caller is trace_find_cmdline+0x40/0xfc CPU: 0 Not tainted 2.6.30-rc7-dirty #39 Process cat (pid: 2278, task: 000000003faedb68, ksp: 000000003b33b988) 000000003b33b988 000000003b33bae0 0000000000000002 0000000000000000 000000003b33bb80 000000003b33baf8 000000003b33baf8 00000000000175d6 0000000000000001 000000003b33b988 000000003f9b0000 000000000000000b 000000000000000c 000000003b33bb40 000000003b33bae0 0000000000000000 0000000000000000 00000000000175d6 000000003b33bae0 000000003b33bb28 Call Trace: ([<00000000000174b2>] show_trace+0x112/0x170) [<0000000000017582>] show_stack+0x72/0x100 [<0000000000441538>] dump_stack+0xc8/0xd8 [<000000000025c350>] debug_smp_processor_id+0x114/0x130 [<00000000000bf0e4>] trace_find_cmdline+0x40/0xfc [<00000000000c35d4>] trace_print_context+0x58/0xac [<00000000000bb676>] print_trace_line+0x416/0x470 [<00000000000bc8fe>] s_show+0x4e/0x428 [<000000000013834e>] seq_read+0x36a/0x5d4 [<0000000000112a78>] vfs_read+0xc8/0x174 [<0000000000112c58>] SyS_read+0x74/0xc4 [<000000000002c7ae>] sysc_noemu+0x10/0x16 [<000002000012436c>] 0x2000012436c 1 lock held by cat/2278: #0: (&p->lock){+.+.+.}, at: [<0000000000138056>] seq_read+0x72/0x5d4 [ Impact: fix preempt-unsafe raw spinlock ] Signed-off-by: Heiko Carstens Acked-by: Steven Rostedt Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 02d32baa23a..a3a8a87d7e9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -808,6 +808,7 @@ void trace_find_cmdline(int pid, char comm[]) return; } + preempt_disable(); __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) @@ -816,6 +817,7 @@ void trace_find_cmdline(int pid, char comm[]) strcpy(comm, "<...>"); __raw_spin_unlock(&trace_cmdline_lock); + preempt_enable(); } void tracing_record_cmdline(struct task_struct *tsk) -- cgit v1.2.3-70-g09d2 From 112f38a7e36e9d688b389507136bf3af3e6d159b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Jun 2009 15:16:05 -0400 Subject: tracing: make trace pipe recognize latency format flag The trace_pipe did not recognize the latency format flag and would produce different output than the trace file. The problem was partly due that the trace flags in the iterator was not set as well as the trace_pipe zeros out part of the iterator (including the flags) to be able to use the same routines as the trace file. trace_flags of the iterator should not cause any problems when not zeroed out by for trace_pipe. Reported-by: Johannes Berg Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- kernel/trace/trace.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bbf40f624fc..5c093ffc655 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -51,6 +51,7 @@ struct trace_iterator { int cpu_file; struct mutex mutex; struct ring_buffer_iter *buffer_iter[NR_CPUS]; + unsigned long iter_flags; /* The below is zeroed out in pipe_read */ struct trace_seq seq; @@ -58,7 +59,6 @@ struct trace_iterator { int cpu; u64 ts; - unsigned long iter_flags; loff_t pos; long idx; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3a8a87d7e9..cae34c69752 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2826,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); + if (trace_flags & TRACE_ITER_LATENCY_FMT) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + iter->cpu_file = cpu_file; iter->tr = &global_trace; mutex_init(&iter->mutex); -- cgit v1.2.3-70-g09d2 From 215368e8e59023d6a0abdda896923018d74fdf7f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 15 Jun 2009 10:56:42 +0800 Subject: tracing: fix a typo in tracing_cpumask_write() It's tracing_cpumask_new that should be kfree()ed. This causes tracing_cpumask to be freed due to the typo: # echo z > tracing_cpumask bash: echo: write error: Invalid argument And subsequent reads/writes to tracing_cpuamsk will access this already-freed tracing_cpumask, thus may lead to crash. [ Impact: fix leak and crash when writing invalid val to tracing_cpumask ] Acked-by: Frederic Weisbecker Signed-off-by: Li Zefan LKML-Reference: <4A35B86A.7070608@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8acd9b81a5d..7355a38e18b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2191,11 +2191,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) return -ENOMEM; - mutex_lock(&tracing_cpumask_update_lock); err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); if (err) goto err_unlock; + mutex_lock(&tracing_cpumask_update_lock); + local_irq_disable(); __raw_spin_lock(&ftrace_max_lock); for_each_tracing_cpu(cpu) { @@ -2223,8 +2224,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, return count; err_unlock: - mutex_unlock(&tracing_cpumask_update_lock); - free_cpumask_var(tracing_cpumask); + free_cpumask_var(tracing_cpumask_new); return err; } -- cgit v1.2.3-70-g09d2 From e4f2d10f479d18198ebafcb5e124cc3dd8b8817a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 15 Jun 2009 10:57:28 +0800 Subject: tracing: replace a GFP_ATOMIC with GFP_KERNEL allocation Atomic allocation is not needed here. [ Impact: clean up of memory alloction type ] Acked-by: Frederic Weisbecker Signed-off-by: Li Zefan LKML-Reference: <4A35B898.2050607@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7355a38e18b..0f57f1b443b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3627,7 +3627,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, struct trace_seq *s; unsigned long cnt; - s = kmalloc(sizeof(*s), GFP_ATOMIC); + s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return ENOMEM; -- cgit v1.2.3-70-g09d2 From 156f5a7801195fa2ce44aeeb62d6cf8468f3332a Mon Sep 17 00:00:00 2001 From: GeunSik Lim Date: Tue, 2 Jun 2009 15:01:37 +0900 Subject: debugfs: Fix terminology inconsistency of dir name to mount debugfs filesystem. Many developers use "/debug/" or "/debugfs/" or "/sys/kernel/debug/" directory name to mount debugfs filesystem for ftrace according to ./Documentation/tracers/ftrace.txt file. And, three directory names(ex:/debug/, /debugfs/, /sys/kernel/debug/) is existed in kernel source like ftrace, DRM, Wireless, Documentation, Network[sky2]files to mount debugfs filesystem. debugfs means debug filesystem for debugging easy to use by greg kroah hartman. "/sys/kernel/debug/" name is suitable as directory name of debugfs filesystem. - debugfs related reference: http://lwn.net/Articles/334546/ Fix inconsistency of directory name to mount debugfs filesystem. * From Steven Rostedt - find_debugfs() and tracing_files() in this patch. Signed-off-by: GeunSik Lim Acked-by : Inaky Perez-Gonzalez Reviewed-by : Steven Rostedt Reviewed-by : James Smart CC: Jiri Kosina CC: David Airlie CC: Peter Osterlund CC: Ananth N Mavinakayanahalli CC: Anil S Keshavamurthy CC: Masami Hiramatsu Signed-off-by: Greg Kroah-Hartman --- Documentation/DocBook/debugobjects.tmpl | 2 +- Documentation/cdrom/packet-writing.txt | 2 +- Documentation/fault-injection/fault-injection.txt | 70 +++---- Documentation/kprobes.txt | 6 +- Documentation/trace/ftrace.txt | 233 +++++++++++++--------- Documentation/trace/mmiotrace.txt | 26 +-- drivers/block/pktcdvd.c | 2 +- drivers/gpu/drm/drm_debugfs.c | 12 +- drivers/gpu/drm/drm_drv.c | 2 +- drivers/gpu/drm/drm_stub.c | 2 +- drivers/net/Kconfig | 4 +- drivers/net/wimax/i2400m/i2400m.h | 2 +- drivers/net/wireless/ath/ath5k/Kconfig | 5 +- drivers/net/wireless/libertas/README | 12 +- drivers/scsi/lpfc/lpfc_debugfs.c | 3 +- include/linux/kernel.h | 2 +- include/linux/tracepoint.h | 4 +- kernel/trace/Kconfig | 10 +- kernel/trace/trace.c | 23 +-- scripts/tracing/draw_functrace.py | 7 +- 20 files changed, 238 insertions(+), 191 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl index 7f5f218015f..08ff908aa7a 100644 --- a/Documentation/DocBook/debugobjects.tmpl +++ b/Documentation/DocBook/debugobjects.tmpl @@ -106,7 +106,7 @@ number of errors are printk'ed including a full stack trace. - The statistics are available via debugfs/debug_objects/stats. + The statistics are available via /sys/kernel/debug/debug_objects/stats. They provide information about the number of warnings and the number of successful fixups along with information about the usage of the internal tracking objects and the state of the diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt index cf1f8126991..1c407778c8b 100644 --- a/Documentation/cdrom/packet-writing.txt +++ b/Documentation/cdrom/packet-writing.txt @@ -117,7 +117,7 @@ Using the pktcdvd debugfs interface To read pktcdvd device infos in human readable form, do: - # cat /debug/pktcdvd/pktcdvd[0-7]/info + # cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info For a description of the debugfs interface look into the file: diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 4bc374a1434..07930564079 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@ -29,16 +29,16 @@ o debugfs entries fault-inject-debugfs kernel module provides some debugfs entries for runtime configuration of fault-injection capabilities. -- /debug/fail*/probability: +- /sys/kernel/debug/fail*/probability: likelihood of failure injection, in percent. Format: Note that one-failure-per-hundred is a very high error rate for some testcases. Consider setting probability=100 and configure - /debug/fail*/interval for such testcases. + /sys/kernel/debug/fail*/interval for such testcases. -- /debug/fail*/interval: +- /sys/kernel/debug/fail*/interval: specifies the interval between failures, for calls to should_fail() that pass all the other tests. @@ -46,18 +46,18 @@ configuration of fault-injection capabilities. Note that if you enable this, by setting interval>1, you will probably want to set probability=100. -- /debug/fail*/times: +- /sys/kernel/debug/fail*/times: specifies how many times failures may happen at most. A value of -1 means "no limit". -- /debug/fail*/space: +- /sys/kernel/debug/fail*/space: specifies an initial resource "budget", decremented by "size" on each call to should_fail(,size). Failure injection is suppressed until "space" reaches zero. -- /debug/fail*/verbose +- /sys/kernel/debug/fail*/verbose Format: { 0 | 1 | 2 } specifies the verbosity of the messages when failure is @@ -65,17 +65,17 @@ configuration of fault-injection capabilities. log line per failure; '2' will print a call trace too -- useful to debug the problems revealed by fault injection. -- /debug/fail*/task-filter: +- /sys/kernel/debug/fail*/task-filter: Format: { 'Y' | 'N' } A value of 'N' disables filtering by process (default). Any positive value limits failures to only processes indicated by /proc//make-it-fail==1. -- /debug/fail*/require-start: -- /debug/fail*/require-end: -- /debug/fail*/reject-start: -- /debug/fail*/reject-end: +- /sys/kernel/debug/fail*/require-start: +- /sys/kernel/debug/fail*/require-end: +- /sys/kernel/debug/fail*/reject-start: +- /sys/kernel/debug/fail*/reject-end: specifies the range of virtual addresses tested during stacktrace walking. Failure is injected only if some caller @@ -84,26 +84,26 @@ configuration of fault-injection capabilities. Default required range is [0,ULONG_MAX) (whole of virtual address space). Default rejected range is [0,0). -- /debug/fail*/stacktrace-depth: +- /sys/kernel/debug/fail*/stacktrace-depth: specifies the maximum stacktrace depth walked during search for a caller within [require-start,require-end) OR [reject-start,reject-end). -- /debug/fail_page_alloc/ignore-gfp-highmem: +- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem: Format: { 'Y' | 'N' } default is 'N', setting it to 'Y' won't inject failures into highmem/user allocations. -- /debug/failslab/ignore-gfp-wait: -- /debug/fail_page_alloc/ignore-gfp-wait: +- /sys/kernel/debug/failslab/ignore-gfp-wait: +- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait: Format: { 'Y' | 'N' } default is 'N', setting it to 'Y' will inject failures only into non-sleep allocations (GFP_ATOMIC allocations). -- /debug/fail_page_alloc/min-order: +- /sys/kernel/debug/fail_page_alloc/min-order: specifies the minimum page allocation order to be injected failures. @@ -166,13 +166,13 @@ o Inject slab allocation failures into module init/exit code #!/bin/bash FAILTYPE=failslab -echo Y > /debug/$FAILTYPE/task-filter -echo 10 > /debug/$FAILTYPE/probability -echo 100 > /debug/$FAILTYPE/interval -echo -1 > /debug/$FAILTYPE/times -echo 0 > /debug/$FAILTYPE/space -echo 2 > /debug/$FAILTYPE/verbose -echo 1 > /debug/$FAILTYPE/ignore-gfp-wait +echo Y > /sys/kernel/debug/$FAILTYPE/task-filter +echo 10 > /sys/kernel/debug/$FAILTYPE/probability +echo 100 > /sys/kernel/debug/$FAILTYPE/interval +echo -1 > /sys/kernel/debug/$FAILTYPE/times +echo 0 > /sys/kernel/debug/$FAILTYPE/space +echo 2 > /sys/kernel/debug/$FAILTYPE/verbose +echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait faulty_system() { @@ -217,20 +217,20 @@ then exit 1 fi -cat /sys/module/$module/sections/.text > /debug/$FAILTYPE/require-start -cat /sys/module/$module/sections/.data > /debug/$FAILTYPE/require-end +cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start +cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end -echo N > /debug/$FAILTYPE/task-filter -echo 10 > /debug/$FAILTYPE/probability -echo 100 > /debug/$FAILTYPE/interval -echo -1 > /debug/$FAILTYPE/times -echo 0 > /debug/$FAILTYPE/space -echo 2 > /debug/$FAILTYPE/verbose -echo 1 > /debug/$FAILTYPE/ignore-gfp-wait -echo 1 > /debug/$FAILTYPE/ignore-gfp-highmem -echo 10 > /debug/$FAILTYPE/stacktrace-depth +echo N > /sys/kernel/debug/$FAILTYPE/task-filter +echo 10 > /sys/kernel/debug/$FAILTYPE/probability +echo 100 > /sys/kernel/debug/$FAILTYPE/interval +echo -1 > /sys/kernel/debug/$FAILTYPE/times +echo 0 > /sys/kernel/debug/$FAILTYPE/space +echo 2 > /sys/kernel/debug/$FAILTYPE/verbose +echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait +echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem +echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth -trap "echo 0 > /debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT +trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT echo "Injecting errors into the module $module... (interrupt to stop)" sleep 1000000 diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 1e7a769a10f..053037a1fe6 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -507,9 +507,9 @@ http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115) Appendix A: The kprobes debugfs interface With recent kernels (> 2.6.20) the list of registered kprobes is visible -under the /debug/kprobes/ directory (assuming debugfs is mounted at /debug). +under the /sys/kernel/debug/kprobes/ directory (assuming debugfs is mounted at //sys/kernel/debug). -/debug/kprobes/list: Lists all registered probes on the system +/sys/kernel/debug/kprobes/list: Lists all registered probes on the system c015d71a k vfs_read+0x0 c011a316 j do_fork+0x0 @@ -525,7 +525,7 @@ virtual addresses that correspond to modules that've been unloaded), such probes are marked with [GONE]. If the probe is temporarily disabled, such probes are marked with [DISABLED]. -/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly. +/sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly. Provides a knob to globally and forcibly turn registered kprobes ON or OFF. By default, all kprobes are enabled. By echoing "0" to this file, all diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 7bd27f0e288..a39b3c749de 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -7,7 +7,6 @@ Copyright 2008 Red Hat Inc. (dual licensed under the GPL v2) Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, John Kacur, and David Teigland. - Written for: 2.6.28-rc2 Introduction @@ -33,13 +32,26 @@ The File System Ftrace uses the debugfs file system to hold the control files as well as the files to display output. -To mount the debugfs system: +When debugfs is configured into the kernel (which selecting any ftrace +option will do) the directory /sys/kernel/debug will be created. To mount +this directory, you can add to your /etc/fstab file: + + debugfs /sys/kernel/debug debugfs defaults 0 0 + +Or you can mount it at run time with: + + mount -t debugfs nodev /sys/kernel/debug - # mkdir /debug - # mount -t debugfs nodev /debug +For quicker access to that directory you may want to make a soft link to +it: -( Note: it is more common to mount at /sys/kernel/debug, but for - simplicity this document will use /debug) + ln -s /sys/kernel/debug /debug + +Any selected ftrace option will also create a directory called tracing +within the debugfs. The rest of the document will assume that you are in +the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate +on the files within that directory and not distract from the content with +the extended "/sys/kernel/debug/tracing" path name. That's it! (assuming that you have ftrace configured into your kernel) @@ -389,18 +401,18 @@ trace_options The trace_options file is used to control what gets printed in the trace output. To see what is available, simply cat the file: - cat /debug/tracing/trace_options + cat trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj To disable one of the options, echo in the option prepended with "no". - echo noprint-parent > /debug/tracing/trace_options + echo noprint-parent > trace_options To enable an option, leave off the "no". - echo sym-offset > /debug/tracing/trace_options + echo sym-offset > trace_options Here are the available options: @@ -476,11 +488,11 @@ sched_switch This tracer simply records schedule switches. Here is an example of how to use it. - # echo sched_switch > /debug/tracing/current_tracer - # echo 1 > /debug/tracing/tracing_enabled + # echo sched_switch > current_tracer + # echo 1 > tracing_enabled # sleep 1 - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/trace + # echo 0 > tracing_enabled + # cat trace # tracer: sched_switch # @@ -583,13 +595,13 @@ new trace is saved. To reset the maximum, echo 0 into tracing_max_latency. Here is an example: - # echo irqsoff > /debug/tracing/current_tracer - # echo 0 > /debug/tracing/tracing_max_latency - # echo 1 > /debug/tracing/tracing_enabled + # echo irqsoff > current_tracer + # echo 0 > tracing_max_latency + # echo 1 > tracing_enabled # ls -ltr [...] - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/latency_trace + # echo 0 > tracing_enabled + # cat latency_trace # tracer: irqsoff # irqsoff latency trace v1.1.5 on 2.6.26 @@ -690,13 +702,13 @@ Like the irqsoff tracer, it records the maximum latency for which preemption was disabled. The control of preemptoff tracer is much like the irqsoff tracer. - # echo preemptoff > /debug/tracing/current_tracer - # echo 0 > /debug/tracing/tracing_max_latency - # echo 1 > /debug/tracing/tracing_enabled + # echo preemptoff > current_tracer + # echo 0 > tracing_max_latency + # echo 1 > tracing_enabled # ls -ltr [...] - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/latency_trace + # echo 0 > tracing_enabled + # cat latency_trace # tracer: preemptoff # preemptoff latency trace v1.1.5 on 2.6.26-rc8 @@ -837,13 +849,13 @@ tracer. Again, using this trace is much like the irqsoff and preemptoff tracers. - # echo preemptirqsoff > /debug/tracing/current_tracer - # echo 0 > /debug/tracing/tracing_max_latency - # echo 1 > /debug/tracing/tracing_enabled + # echo preemptirqsoff > current_tracer + # echo 0 > tracing_max_latency + # echo 1 > tracing_enabled # ls -ltr [...] - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/latency_trace + # echo 0 > tracing_enabled + # cat latency_trace # tracer: preemptirqsoff # preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 @@ -999,12 +1011,12 @@ slightly differently than we did with the previous tracers. Instead of performing an 'ls', we will run 'sleep 1' under 'chrt' which changes the priority of the task. - # echo wakeup > /debug/tracing/current_tracer - # echo 0 > /debug/tracing/tracing_max_latency - # echo 1 > /debug/tracing/tracing_enabled + # echo wakeup > current_tracer + # echo 0 > tracing_max_latency + # echo 1 > tracing_enabled # chrt -f 5 sleep 1 - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/latency_trace + # echo 0 > tracing_enabled + # cat latency_trace # tracer: wakeup # wakeup latency trace v1.1.5 on 2.6.26-rc8 @@ -1114,11 +1126,11 @@ can be done from the debug file system. Make sure the ftrace_enabled is set; otherwise this tracer is a nop. # sysctl kernel.ftrace_enabled=1 - # echo function > /debug/tracing/current_tracer - # echo 1 > /debug/tracing/tracing_enabled + # echo function > current_tracer + # echo 1 > tracing_enabled # usleep 1 - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/trace + # echo 0 > tracing_enabled + # cat trace # tracer: function # # TASK-PID CPU# TIMESTAMP FUNCTION @@ -1155,7 +1167,7 @@ int trace_fd; [...] int main(int argc, char *argv[]) { [...] - trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY); + trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); [...] if (condition_hit()) { write(trace_fd, "0", 1); @@ -1163,26 +1175,20 @@ int main(int argc, char *argv[]) { [...] } -Note: Here we hard coded the path name. The debugfs mount is not -guaranteed to be at /debug (and is more commonly at -/sys/kernel/debug). For simple one time traces, the above is -sufficent. For anything else, a search through /proc/mounts may -be needed to find where the debugfs file-system is mounted. - Single thread tracing --------------------- -By writing into /debug/tracing/set_ftrace_pid you can trace a +By writing into set_ftrace_pid you can trace a single thread. For example: -# cat /debug/tracing/set_ftrace_pid +# cat set_ftrace_pid no pid -# echo 3111 > /debug/tracing/set_ftrace_pid -# cat /debug/tracing/set_ftrace_pid +# echo 3111 > set_ftrace_pid +# cat set_ftrace_pid 3111 -# echo function > /debug/tracing/current_tracer -# cat /debug/tracing/trace | head +# echo function > current_tracer +# cat trace | head # tracer: function # # TASK-PID CPU# TIMESTAMP FUNCTION @@ -1193,8 +1199,8 @@ no pid yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll -# echo -1 > /debug/tracing/set_ftrace_pid -# cat /debug/tracing/trace |head +# echo -1 > set_ftrace_pid +# cat trace |head # tracer: function # # TASK-PID CPU# TIMESTAMP FUNCTION @@ -1216,6 +1222,51 @@ something like this simple program: #include #include +#define _STR(x) #x +#define STR(x) _STR(x) +#define MAX_PATH 256 + +const char *find_debugfs(void) +{ + static char debugfs[MAX_PATH+1]; + static int debugfs_found; + char type[100]; + FILE *fp; + + if (debugfs_found) + return debugfs; + + if ((fp = fopen("/proc/mounts","r")) == NULL) { + perror("/proc/mounts"); + return NULL; + } + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + debugfs, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) { + fprintf(stderr, "debugfs not mounted"); + return NULL; + } + + debugfs_found = 1; + + return debugfs; +} + +const char *tracing_file(const char *file_name) +{ + static char trace_file[MAX_PATH+1]; + snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name); + return trace_file; +} + int main (int argc, char **argv) { if (argc < 1) @@ -1226,12 +1277,12 @@ int main (int argc, char **argv) char line[64]; int s; - ffd = open("/debug/tracing/current_tracer", O_WRONLY); + ffd = open(tracing_file("current_tracer"), O_WRONLY); if (ffd < 0) exit(-1); write(ffd, "nop", 3); - fd = open("/debug/tracing/set_ftrace_pid", O_WRONLY); + fd = open(tracing_file("set_ftrace_pid"), O_WRONLY); s = sprintf(line, "%d\n", getpid()); write(fd, line, s); @@ -1383,22 +1434,22 @@ want, depending on your needs. tracing_cpu_mask file) or you might sometimes see unordered function calls while cpu tracing switch. - hide: echo nofuncgraph-cpu > /debug/tracing/trace_options - show: echo funcgraph-cpu > /debug/tracing/trace_options + hide: echo nofuncgraph-cpu > trace_options + show: echo funcgraph-cpu > trace_options - The duration (function's time of execution) is displayed on the closing bracket line of a function or on the same line than the current function in case of a leaf one. It is default enabled. - hide: echo nofuncgraph-duration > /debug/tracing/trace_options - show: echo funcgraph-duration > /debug/tracing/trace_options + hide: echo nofuncgraph-duration > trace_options + show: echo funcgraph-duration > trace_options - The overhead field precedes the duration field in case of reached duration thresholds. - hide: echo nofuncgraph-overhead > /debug/tracing/trace_options - show: echo funcgraph-overhead > /debug/tracing/trace_options + hide: echo nofuncgraph-overhead > trace_options + show: echo funcgraph-overhead > trace_options depends on: funcgraph-duration ie: @@ -1427,8 +1478,8 @@ want, depending on your needs. - The task/pid field displays the thread cmdline and pid which executed the function. It is default disabled. - hide: echo nofuncgraph-proc > /debug/tracing/trace_options - show: echo funcgraph-proc > /debug/tracing/trace_options + hide: echo nofuncgraph-proc > trace_options + show: echo funcgraph-proc > trace_options ie: @@ -1451,8 +1502,8 @@ want, depending on your needs. system clock since it started. A snapshot of this time is given on each entry/exit of functions - hide: echo nofuncgraph-abstime > /debug/tracing/trace_options - show: echo funcgraph-abstime > /debug/tracing/trace_options + hide: echo nofuncgraph-abstime > trace_options + show: echo funcgraph-abstime > trace_options ie: @@ -1549,7 +1600,7 @@ listed in: available_filter_functions - # cat /debug/tracing/available_filter_functions + # cat available_filter_functions put_prev_task_idle kmem_cache_create pick_next_task_rt @@ -1561,12 +1612,12 @@ mutex_lock If I am only interested in sys_nanosleep and hrtimer_interrupt: # echo sys_nanosleep hrtimer_interrupt \ - > /debug/tracing/set_ftrace_filter - # echo ftrace > /debug/tracing/current_tracer - # echo 1 > /debug/tracing/tracing_enabled + > set_ftrace_filter + # echo ftrace > current_tracer + # echo 1 > tracing_enabled # usleep 1 - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/trace + # echo 0 > tracing_enabled + # cat trace # tracer: ftrace # # TASK-PID CPU# TIMESTAMP FUNCTION @@ -1577,7 +1628,7 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt: To see which functions are being traced, you can cat the file: - # cat /debug/tracing/set_ftrace_filter + # cat set_ftrace_filter hrtimer_interrupt sys_nanosleep @@ -1597,7 +1648,7 @@ Note: It is better to use quotes to enclose the wild cards, otherwise the shell may expand the parameters into names of files in the local directory. - # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter + # echo 'hrtimer_*' > set_ftrace_filter Produces: @@ -1618,7 +1669,7 @@ Produces: Notice that we lost the sys_nanosleep. - # cat /debug/tracing/set_ftrace_filter + # cat set_ftrace_filter hrtimer_run_queues hrtimer_run_pending hrtimer_init @@ -1644,17 +1695,17 @@ To append to the filters, use '>>' To clear out a filter so that all functions will be recorded again: - # echo > /debug/tracing/set_ftrace_filter - # cat /debug/tracing/set_ftrace_filter + # echo > set_ftrace_filter + # cat set_ftrace_filter # Again, now we want to append. - # echo sys_nanosleep > /debug/tracing/set_ftrace_filter - # cat /debug/tracing/set_ftrace_filter + # echo sys_nanosleep > set_ftrace_filter + # cat set_ftrace_filter sys_nanosleep - # echo 'hrtimer_*' >> /debug/tracing/set_ftrace_filter - # cat /debug/tracing/set_ftrace_filter + # echo 'hrtimer_*' >> set_ftrace_filter + # cat set_ftrace_filter hrtimer_run_queues hrtimer_run_pending hrtimer_init @@ -1677,7 +1728,7 @@ hrtimer_init_sleeper The set_ftrace_notrace prevents those functions from being traced. - # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace + # echo '*preempt*' '*lock*' > set_ftrace_notrace Produces: @@ -1767,13 +1818,13 @@ the effect on the tracing is different. Every read from trace_pipe is consumed. This means that subsequent reads will be different. The trace is live. - # echo function > /debug/tracing/current_tracer - # cat /debug/tracing/trace_pipe > /tmp/trace.out & + # echo function > current_tracer + # cat trace_pipe > /tmp/trace.out & [1] 4153 - # echo 1 > /debug/tracing/tracing_enabled + # echo 1 > tracing_enabled # usleep 1 - # echo 0 > /debug/tracing/tracing_enabled - # cat /debug/tracing/trace + # echo 0 > tracing_enabled + # cat trace # tracer: function # # TASK-PID CPU# TIMESTAMP FUNCTION @@ -1809,7 +1860,7 @@ number listed is the number of entries that can be recorded per CPU. To know the full size, multiply the number of possible CPUS with the number of entries. - # cat /debug/tracing/buffer_size_kb + # cat buffer_size_kb 1408 (units kilobytes) Note, to modify this, you must have tracing completely disabled. @@ -1817,18 +1868,18 @@ To do that, echo "nop" into the current_tracer. If the current_tracer is not set to "nop", an EINVAL error will be returned. - # echo nop > /debug/tracing/current_tracer - # echo 10000 > /debug/tracing/buffer_size_kb - # cat /debug/tracing/buffer_size_kb + # echo nop > current_tracer + # echo 10000 > buffer_size_kb + # cat buffer_size_kb 10000 (units kilobytes) The number of pages which will be allocated is limited to a percentage of available memory. Allocating too much will produce an error. - # echo 1000000000000 > /debug/tracing/buffer_size_kb + # echo 1000000000000 > buffer_size_kb -bash: echo: write error: Cannot allocate memory - # cat /debug/tracing/buffer_size_kb + # cat buffer_size_kb 85 ----------- diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.txt index 5731c67abc5..162effbfbde 100644 --- a/Documentation/trace/mmiotrace.txt +++ b/Documentation/trace/mmiotrace.txt @@ -32,41 +32,41 @@ is no way to automatically detect if you are losing events due to CPUs racing. Usage Quick Reference --------------------- -$ mount -t debugfs debugfs /debug -$ echo mmiotrace > /debug/tracing/current_tracer -$ cat /debug/tracing/trace_pipe > mydump.txt & +$ mount -t debugfs debugfs /sys/kernel/debug +$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer +$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & Start X or whatever. -$ echo "X is up" > /debug/tracing/trace_marker -$ echo nop > /debug/tracing/current_tracer +$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker +$ echo nop > /sys/kernel/debug/tracing/current_tracer Check for lost events. Usage ----- -Make sure debugfs is mounted to /debug. If not, (requires root privileges) -$ mount -t debugfs debugfs /debug +Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges) +$ mount -t debugfs debugfs /sys/kernel/debug Check that the driver you are about to trace is not loaded. Activate mmiotrace (requires root privileges): -$ echo mmiotrace > /debug/tracing/current_tracer +$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer Start storing the trace: -$ cat /debug/tracing/trace_pipe > mydump.txt & +$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt & The 'cat' process should stay running (sleeping) in the background. Load the driver you want to trace and use it. Mmiotrace will only catch MMIO accesses to areas that are ioremapped while mmiotrace is active. During tracing you can place comments (markers) into the trace by -$ echo "X is up" > /debug/tracing/trace_marker +$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker This makes it easier to see which part of the (huge) trace corresponds to which action. It is recommended to place descriptive markers about what you do. Shut down mmiotrace (requires root privileges): -$ echo nop > /debug/tracing/current_tracer +$ echo nop > /sys/kernel/debug/tracing/current_tracer The 'cat' process exits. If it does not, kill it by issuing 'fg' command and pressing ctrl+c. @@ -78,10 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If events were lost, the trace is incomplete. You should enlarge the buffers and try again. Buffers are enlarged by first seeing how large the current buffers are: -$ cat /debug/tracing/buffer_size_kb +$ cat /sys/kernel/debug/tracing/buffer_size_kb gives you a number. Approximately double this number and write it back, for instance: -$ echo 128000 > /debug/tracing/buffer_size_kb +$ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb Then start again from the top. If you are doing a trace for a driver project, e.g. Nouveau, you should also diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 37e0f81cada..83650e00632 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -430,7 +430,7 @@ static void pkt_sysfs_cleanup(void) /******************************************************************** entries in debugfs - /debugfs/pktcdvd[0-7]/ + /sys/kernel/debug/pktcdvd[0-7]/ info *******************************************************************/ diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index c77c6c6d9d2..6ce0e2667a8 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -105,7 +105,7 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count, ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO, root, tmp, &drm_debugfs_fops); if (!ent) { - DRM_ERROR("Cannot create /debugfs/dri/%s/%s\n", + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n", name, files[i].name); drm_free(tmp, sizeof(struct drm_info_node), _DRM_DRIVER); @@ -133,9 +133,9 @@ EXPORT_SYMBOL(drm_debugfs_create_files); * \param minor device minor number * \param root DRI debugfs dir entry. * - * Create the DRI debugfs root entry "/debugfs/dri", the device debugfs root entry - * "/debugfs/dri/%minor%/", and each entry in debugfs_list as - * "/debugfs/dri/%minor%/%name%". + * Create the DRI debugfs root entry "/sys/kernel/debug/dri", the device debugfs root entry + * "/sys/kernel/debug/dri/%minor%/", and each entry in debugfs_list as + * "/sys/kernel/debug/dri/%minor%/%name%". */ int drm_debugfs_init(struct drm_minor *minor, int minor_id, struct dentry *root) @@ -148,7 +148,7 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id, sprintf(name, "%d", minor_id); minor->debugfs_root = debugfs_create_dir(name, root); if (!minor->debugfs_root) { - DRM_ERROR("Cannot create /debugfs/dri/%s\n", name); + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s\n", name); return -1; } @@ -165,7 +165,7 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id, ret = dev->driver->debugfs_init(minor); if (ret) { DRM_ERROR("DRM: Driver failed to initialize " - "/debugfs/dri.\n"); + "/sys/kernel/debug/dri.\n"); return ret; } } diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 019b7c57823..1bf7efd8d33 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -339,7 +339,7 @@ static int __init drm_core_init(void) drm_debugfs_root = debugfs_create_dir("dri", NULL); if (!drm_debugfs_root) { - DRM_ERROR("Cannot create /debugfs/dri\n"); + DRM_ERROR("Cannot create /sys/kernel/debug/dri\n"); ret = -1; goto err_p3; } diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 89050684fe0..387a8de1bc7 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -343,7 +343,7 @@ static int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int t #if defined(CONFIG_DEBUG_FS) ret = drm_debugfs_init(new_minor, minor_id, drm_debugfs_root); if (ret) { - DRM_ERROR("DRM: Failed to initialize /debugfs/dri.\n"); + DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); goto err_g2; } #endif diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 01f282cd098..3b6383168c6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2206,7 +2206,7 @@ config SKGE_DEBUG depends on SKGE && DEBUG_FS help This option adds the ability to dump driver state for debugging. - The file debugfs/skge/ethX displays the state of the internal + The file /sys/kernel/debug/skge/ethX displays the state of the internal transmit and receive rings. If unsure, say N. @@ -2232,7 +2232,7 @@ config SKY2_DEBUG depends on SKY2 && DEBUG_FS help This option adds the ability to dump driver state for debugging. - The file debugfs/sky2/ethX displays the state of the internal + The file /sys/kernel/debug/sky2/ethX displays the state of the internal transmit and receive rings. If unsure, say N. diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h index 1fe5da4cf0a..60330f313f2 100644 --- a/drivers/net/wimax/i2400m/i2400m.h +++ b/drivers/net/wimax/i2400m/i2400m.h @@ -432,7 +432,7 @@ struct i2400m { unsigned ready:1; /* all probing steps done */ unsigned rx_reorder:1; /* RX reorder is enabled */ u8 trace_msg_from_user; /* echo rx msgs to 'trace' pipe */ - /* typed u8 so debugfs/u8 can tweak */ + /* typed u8 so /sys/kernel/debug/u8 can tweak */ enum i2400m_system_state state; wait_queue_head_t state_wq; /* Woken up when on state updates */ diff --git a/drivers/net/wireless/ath/ath5k/Kconfig b/drivers/net/wireless/ath/ath5k/Kconfig index 509b6f94f73..daf0c83527d 100644 --- a/drivers/net/wireless/ath/ath5k/Kconfig +++ b/drivers/net/wireless/ath/ath5k/Kconfig @@ -28,11 +28,10 @@ config ATH5K_DEBUG Say Y, if and you will get debug options for ath5k. To use this, you need to mount debugfs: - mkdir /debug/ - mount -t debugfs debug /debug/ + mount -t debugfs debug /sys/kernel/debug You will get access to files under: - /debug/ath5k/phy0/ + /sys/kernel/debug/ath5k/phy0/ To enable debug, pass the debug level to the debug module parameter. For example: diff --git a/drivers/net/wireless/libertas/README b/drivers/net/wireless/libertas/README index d860fc37575..ab6a2d518af 100644 --- a/drivers/net/wireless/libertas/README +++ b/drivers/net/wireless/libertas/README @@ -72,7 +72,7 @@ rdrf location that is to be read. This parameter must be specified in hexadecimal (its possible to preceed preceding the number with a "0x"). - Path: /debugfs/libertas_wireless/ethX/registers/ + Path: /sys/kernel/debug/libertas_wireless/ethX/registers/ Usage: echo "0xa123" > rdmac ; cat rdmac @@ -95,7 +95,7 @@ wrrf sleepparams This command is used to set the sleepclock configurations - Path: /debugfs/libertas_wireless/ethX/ + Path: /sys/kernel/debug/libertas_wireless/ethX/ Usage: cat sleepparams: reads the current sleepclock configuration @@ -115,7 +115,7 @@ subscribed_events The subscribed_events directory contains the interface for the subscribed events API. - Path: /debugfs/libertas_wireless/ethX/subscribed_events/ + Path: /sys/kernel/debug/libertas_wireless/ethX/subscribed_events/ Each event is represented by a filename. Each filename consists of the following three fields: @@ -165,7 +165,7 @@ subscribed_events extscan This command is used to do a specific scan. - Path: /debugfs/libertas_wireless/ethX/ + Path: /sys/kernel/debug/libertas_wireless/ethX/ Usage: echo "SSID" > extscan @@ -179,7 +179,7 @@ getscantable Display the current contents of the driver scan table (ie. get the scan results). - Path: /debugfs/libertas_wireless/ethX/ + Path: /sys/kernel/debug/libertas_wireless/ethX/ Usage: cat getscantable @@ -188,7 +188,7 @@ setuserscan Initiate a customized scan and retrieve the results - Path: /debugfs/libertas_wireless/ethX/ + Path: /sys/kernel/debug/libertas_wireless/ethX/ Usage: echo "[ARGS]" > setuserscan diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 2b02b1fb39a..8d0f0de76b6 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -53,8 +53,7 @@ * debugfs interface * * To access this interface the user should: - * # mkdir /debug - * # mount -t debugfs none /debug + * # mount -t debugfs none /sys/kernel/debug * * The lpfc debugfs directory hierarchy is: * lpfc/lpfcX/vportY diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 883cd44ff76..99b7aada28d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -406,7 +406,7 @@ static inline char *pack_hex_byte(char *buf, u8 byte) * * Use tracing_on/tracing_off when you want to quickly turn on or off * tracing. It simply enables or disables the recording of the trace events. - * This also corresponds to the user space debugfs/tracing/tracing_on + * This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on * file, which gives a means for the kernel and userspace to interact. * Place a tracing_off() in the kernel where you want tracing to end. * From user space, examine the trace, and then echo 1 > tracing_on diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 14df7e635d4..b9dc4ca0246 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -198,7 +198,7 @@ static inline void tracepoint_synchronize_unregister(void) * * This is how the trace record is structured and will * * be saved into the ring buffer. These are the fields * * that will be exposed to user-space in - * * /debug/tracing/events/<*>/format. + * * /sys/kernel/debug/tracing/events/<*>/format. * * * * The declared 'local variable' is called '__entry' * * @@ -258,7 +258,7 @@ static inline void tracepoint_synchronize_unregister(void) * tracepoint callback (this is used by programmatic plugins and * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in - * /debug/tracing/events/. + * /sys/kernel/debug/tracing/events/. */ #define TRACE_EVENT(name, proto, args, struct, assign, print) \ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4a13e5a01ce..61071fecc82 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -147,7 +147,7 @@ config IRQSOFF_TRACER disabled by default and can be runtime (re-)started via: - echo 0 > /debugfs/tracing/tracing_max_latency + echo 0 > /sys/kernel/debug/tracing/tracing_max_latency (Note that kernel size and overhead increases with this option enabled. This option and the preempt-off timing option can be @@ -168,7 +168,7 @@ config PREEMPT_TRACER disabled by default and can be runtime (re-)started via: - echo 0 > /debugfs/tracing/tracing_max_latency + echo 0 > /sys/kernel/debug/tracing/tracing_max_latency (Note that kernel size and overhead increases with this option enabled. This option and the irqs-off timing option can be @@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES This tracer profiles all the the likely and unlikely macros in the kernel. It will display the results in: - /debugfs/tracing/profile_annotated_branch + /sys/kernel/debug/tracing/profile_annotated_branch Note: this will add a significant overhead, only turn this on if you need to profile the system's use of these macros. @@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES taken in the kernel is recorded whether it hit or miss. The results will be displayed in: - /debugfs/tracing/profile_branch + /sys/kernel/debug/tracing/profile_branch This option also enables the likely/unlikely profiler. @@ -323,7 +323,7 @@ config STACK_TRACER select KALLSYMS help This special tracer records the maximum stack footprint of the - kernel and displays it in debugfs/tracing/stack_trace. + kernel and displays it in /sys/kernel/debug/tracing/stack_trace. This tracer works by hooking into every function call that the kernel executes, and keeping a maximum stack depth value and diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8acd9b81a5d..c1878bfb2e1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock = /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, - * for later retrieval via /debugfs/tracing/latency_trace) + * for later retrieval via /sys/kernel/debug/tracing/latency_trace) */ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) @@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = { static const char readme_msg[] = "tracing mini-HOWTO:\n\n" - "# mkdir /debug\n" - "# mount -t debugfs nodev /debug\n\n" - "# cat /debug/tracing/available_tracers\n" + "# mount -t debugfs nodev /sys/kernel/debug\n\n" + "# cat /sys/kernel/debug/tracing/available_tracers\n" "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" - "# cat /debug/tracing/current_tracer\n" + "# cat /sys/kernel/debug/tracing/current_tracer\n" "nop\n" - "# echo sched_switch > /debug/tracing/current_tracer\n" - "# cat /debug/tracing/current_tracer\n" + "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" + "# cat /sys/kernel/debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/trace_options\n" + "# cat /sys/kernel/debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/trace_options\n" - "# echo 1 > /debug/tracing/tracing_enabled\n" - "# cat /debug/tracing/trace > /tmp/trace.txt\n" - "# echo 0 > /debug/tracing/tracing_enabled\n" + "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" + "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" + "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" + "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" ; static ssize_t diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 902f9a99262..db40fa04cd5 100644 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -12,10 +12,9 @@ calls. Only the functions's names and the the call time are provided. Usage: Be sure that you have CONFIG_FUNCTION_TRACER - # mkdir /debugfs - # mount -t debug debug /debug - # echo function > /debug/tracing/current_tracer - $ cat /debug/tracing/trace_pipe > ~/raw_trace_func + # mount -t debugfs nodev /sys/kernel/debug + # echo function > /sys/kernel/debug/tracing/current_tracer + $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func Wait some times but not too much, the script is a bit slow. Break the pipe (Ctrl + Z) $ scripts/draw_functrace.py < raw_trace_func > draw_functrace -- cgit v1.2.3-70-g09d2