From b77e38aa240c3bd9c55c98b9f7c81541e042eae5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Feb 2009 10:21:36 -0500 Subject: tracing: add event trace infrastructure This patch creates the event tracing infrastructure of ftrace. It will create the files: /debug/tracing/available_events /debug/tracing/set_event The available_events will list the trace points that have been registered with the event tracer. set_events will allow the user to enable or disable an event hook. example: # echo sched_wakeup > /debug/tracing/set_event Will enable the sched_wakeup event (if it is registered). # echo "!sched_wakeup" >> /debug/tracing/set_event Will disable the sched_wakeup event (and only that event). # echo > /debug/tracing/set_event Will disable all events (notice the '>') # cat /debug/tracing/available_events > /debug/tracing/set_event Will enable all registered event hooks. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 280 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 kernel/trace/trace_events.c (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c new file mode 100644 index 00000000000..05bc80ec8d2 --- /dev/null +++ b/kernel/trace/trace_events.c @@ -0,0 +1,280 @@ +/* + * event tracer + * + * Copyright (C) 2008 Red Hat Inc, Steven Rostedt + * + */ + +#include +#include +#include +#include + +#include "trace_events.h" + +void event_trace_printk(unsigned long ip, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + tracing_record_cmdline(current); + trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + va_end(ap); +} + +static void ftrace_clear_events(void) +{ + struct ftrace_event_call *call = (void *)__start_ftrace_events; + + + while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { + + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + } + call++; + } +} + +static int ftrace_set_clr_event(char *buf, int set) +{ + struct ftrace_event_call *call = (void *)__start_ftrace_events; + + + while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { + + if (strcmp(buf, call->name) != 0) { + call++; + continue; + } + + if (set) { + /* Already set? */ + if (call->enabled) + return 0; + call->enabled = 1; + call->regfunc(); + } else { + /* Already cleared? */ + if (!call->enabled) + return 0; + call->enabled = 0; + call->unregfunc(); + } + return 0; + } + return -EINVAL; +} + +/* 128 should be much more than enough */ +#define EVENT_BUF_SIZE 127 + +static ssize_t +ftrace_event_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + size_t read = 0; + int i, set = 1; + ssize_t ret; + char *buf; + char ch; + + if (!cnt || cnt < 0) + return 0; + + ret = get_user(ch, ubuf++); + if (ret) + return ret; + read++; + cnt--; + + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + return ret; + read++; + cnt--; + } + + /* Only white space found? */ + if (isspace(ch)) { + file->f_pos += read; + ret = read; + return ret; + } + + buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (cnt > EVENT_BUF_SIZE) + cnt = EVENT_BUF_SIZE; + + i = 0; + while (cnt && !isspace(ch)) { + if (!i && ch == '!') + set = 0; + else + buf[i++] = ch; + + ret = get_user(ch, ubuf++); + if (ret) + goto out_free; + read++; + cnt--; + } + buf[i] = 0; + + file->f_pos += read; + + ret = ftrace_set_clr_event(buf, set); + if (ret) + goto out_free; + + ret = read; + + out_free: + kfree(buf); + + return ret; +} + +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_event_call *call = m->private; + struct ftrace_event_call *next = call; + + (*pos)++; + + if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + return NULL; + + m->private = ++next; + + return call; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + return t_next(m, NULL, pos); +} + +static void * +s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_event_call *call = m->private; + struct ftrace_event_call *next; + + (*pos)++; + + retry: + if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + return NULL; + + if (!call->enabled) { + call++; + goto retry; + } + + next = call; + m->private = ++next; + + return call; +} + +static void *s_start(struct seq_file *m, loff_t *pos) +{ + return s_next(m, NULL, pos); +} + +static int t_show(struct seq_file *m, void *v) +{ + struct ftrace_event_call *call = v; + + seq_printf(m, "%s\n", call->name); + + return 0; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int +ftrace_event_seq_open(struct inode *inode, struct file *file) +{ + int ret; + const struct seq_operations *seq_ops; + + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) + ftrace_clear_events(); + + seq_ops = inode->i_private; + ret = seq_open(file, seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + + m->private = __start_ftrace_events; + } + return ret; +} + +static const struct seq_operations show_event_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show, + .stop = t_stop, +}; + +static const struct seq_operations show_set_event_seq_ops = { + .start = s_start, + .next = s_next, + .show = t_show, + .stop = t_stop, +}; + +static const struct file_operations ftrace_avail_fops = { + .open = ftrace_event_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations ftrace_set_event_fops = { + .open = ftrace_event_seq_open, + .read = seq_read, + .write = ftrace_event_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static __init int event_trace_init(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + entry = debugfs_create_file("available_events", 0444, d_tracer, + (void *)&show_event_seq_ops, + &ftrace_avail_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'available_events' entry\n"); + + entry = debugfs_create_file("set_event", 0644, d_tracer, + (void *)&show_set_event_seq_ops, + &ftrace_set_event_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_event' entry\n"); + + return 0; +} +fs_initcall(event_trace_init); -- cgit v1.2.3-70-g09d2 From 1473e4417c79f12d91ef91a469699bfa911f510f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Feb 2009 14:15:08 -0500 Subject: tracing: make event directory structure This patch adds the directory /debug/tracing/events/ that will contain all the registered trace points. # ls /debug/tracing/events/ sched_kthread_stop sched_process_fork sched_switch sched_kthread_stop_ret sched_process_free sched_wait_task sched_migrate_task sched_process_wait sched_wakeup sched_process_exit sched_signal_send sched_wakeup_new # ls /debug/tracing/events/sched_switch/ enable # cat /debug/tracing/events/sched_switch/enable 1 # cat /debug/tracing/set_event sched_switch Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 137 ++++++++++++++++++++++++++++++++++++++++++-- kernel/trace/trace_events.h | 7 ++- 2 files changed, 137 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 05bc80ec8d2..3bcb9df9334 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -12,6 +12,11 @@ #include "trace_events.h" +#define events_for_each(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + void event_trace_printk(unsigned long ip, const char *fmt, ...) { va_list ap; @@ -39,15 +44,16 @@ static void ftrace_clear_events(void) static int ftrace_set_clr_event(char *buf, int set) { - struct ftrace_event_call *call = (void *)__start_ftrace_events; + struct ftrace_event_call *call = __start_ftrace_events; - while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { + events_for_each(call) { - if (strcmp(buf, call->name) != 0) { - call++; + if (!call->name) + continue; + + if (strcmp(buf, call->name) != 0) continue; - } if (set) { /* Already set? */ @@ -223,6 +229,67 @@ ftrace_event_seq_open(struct inode *inode, struct file *file) return ret; } +static ssize_t +event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char *buf; + + if (call->enabled) + buf = "1\n"; + else + buf = "0\n"; + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); +} + +static ssize_t +event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[64]; + unsigned long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + switch (val) { + case 0: + if (!call->enabled) + break; + + call->enabled = 0; + call->unregfunc(); + break; + case 1: + if (call->enabled) + break; + + call->enabled = 1; + call->regfunc(); + break; + + default: + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -252,10 +319,59 @@ static const struct file_operations ftrace_set_event_fops = { .release = seq_release, }; +static const struct file_operations ftrace_enable_fops = { + .open = tracing_open_generic, + .read = event_enable_read, + .write = event_enable_write, +}; + +static struct dentry *event_trace_events_dir(void) +{ + static struct dentry *d_tracer; + static struct dentry *d_events; + + if (d_events) + return d_events; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return NULL; + + d_events = debugfs_create_dir("events", d_tracer); + if (!d_events) + pr_warning("Could not create debugfs " + "'events' directory\n"); + + return d_events; +} + +static int +event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) +{ + struct dentry *entry; + + call->dir = debugfs_create_dir(call->name, d_events); + if (!call->dir) { + pr_warning("Could not create debugfs " + "'%s' directory\n", call->name); + return -1; + } + + entry = debugfs_create_file("enable", 0644, call->dir, call, + &ftrace_enable_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/enable' entry\n", call->name); + + return 0; +} + static __init int event_trace_init(void) { + struct ftrace_event_call *call = __start_ftrace_events; struct dentry *d_tracer; struct dentry *entry; + struct dentry *d_events; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -275,6 +391,17 @@ static __init int event_trace_init(void) pr_warning("Could not create debugfs " "'set_event' entry\n"); + d_events = event_trace_events_dir(); + if (!d_events) + return 0; + + events_for_each(call) { + /* The linker may leave blanks */ + if (!call->name) + continue; + event_create_dir(call, d_events); + } + return 0; } fs_initcall(event_trace_init); diff --git a/kernel/trace/trace_events.h b/kernel/trace/trace_events.h index 39342f86db2..cb8455b3ac9 100644 --- a/kernel/trace/trace_events.h +++ b/kernel/trace/trace_events.h @@ -1,11 +1,13 @@ #ifndef _LINUX_KERNEL_TRACE_EVENTS_H #define _LINUX_KERNEL_TRACE_EVENTS_H +#include #include #include "trace.h" struct ftrace_event_call { char *name; + struct dentry *dir; int enabled; int (*regfunc)(void); void (*unregfunc)(void); @@ -39,6 +41,7 @@ static void ftrace_unreg_event_##call(void) \ } \ \ static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .regfunc = ftrace_reg_event_##call, \ @@ -46,7 +49,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ } void event_trace_printk(unsigned long ip, const char *fmt, ...); -extern unsigned long __start_ftrace_events[]; -extern unsigned long __stop_ftrace_events[]; +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; #endif /* _LINUX_KERNEL_TRACE_EVENTS_H */ -- cgit v1.2.3-70-g09d2 From 6ecc2d1ca39177edb6fbdb7412948b0e9f409d02 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 21:33:02 -0500 Subject: tracing: add subsystem level to trace events If a trace point header defines TRACE_SYSTEM, then it will add the following trace points into that event system. If include/trace/irq_event_types.h has: #define TRACE_SYSTEM irq at the top and #undef TRACE_SYSTEM at the bottom, then a directory "irq" will be created in the /debug/tracing/events directory. Inside that directory will contain the two trace points that are defined in include/trace/irq_event_types.h. Only adding the above to irq and not to sched, we get: # ls /debug/tracing/events/ irq sched_process_exit sched_signal_send sched_wakeup_new sched_kthread_stop sched_process_fork sched_switch sched_kthread_stop_ret sched_process_free sched_wait_task sched_migrate_task sched_process_wait sched_wakeup # ls /debug/tracing/events/irq irq_handler_entry irq_handler_exit If we add #define TRACE_SYSTEM sched to the trace/sched_event_types.h then the rest of the trace events will be put in a sched directory within the events directory. I've been playing with this idea of the subsystem for a while, but recently Tom Zanussi posted some patches to lkml that included this method. Tom's approach was clean and got me to finally put some effort to clean up the event trace points. Thanks to Tom Zanussi for demonstrating how nice the subsystem method is. Signed-off-by: Steven Rostedt --- kernel/trace/events.c | 4 ++++ kernel/trace/trace_events.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events.h | 2 ++ 3 files changed, 54 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 46e27ad2487..4e4e45860c5 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -2,6 +2,10 @@ * This is the place to register all trace points as events. */ +/* someday this needs to go in a generic header */ +#define __STR(x) #x +#define STR(x) __STR(x) + #include #include "trace_events.h" diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3bcb9df9334..19332200c45 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -345,11 +345,59 @@ static struct dentry *event_trace_events_dir(void) return d_events; } +struct event_subsystem { + struct list_head list; + const char *name; + struct dentry *entry; +}; + +static LIST_HEAD(event_subsystems); + +static struct dentry * +event_subsystem_dir(const char *name, struct dentry *d_events) +{ + struct event_subsystem *system; + + /* First see if we did not already create this dir */ + list_for_each_entry(system, &event_subsystems, list) { + if (strcmp(system->name, name) == 0) + return system->entry; + } + + /* need to create new entry */ + system = kmalloc(sizeof(*system), GFP_KERNEL); + if (!system) { + pr_warning("No memory to create event subsystem %s\n", + name); + return d_events; + } + + system->entry = debugfs_create_dir(name, d_events); + if (!system->entry) { + pr_warning("Could not create event subsystem %s\n", + name); + kfree(system); + return d_events; + } + + system->name = name; + list_add(&system->list, &event_subsystems); + + return system->entry; +} + static int event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) { struct dentry *entry; + /* + * If the trace point header did not define TRACE_SYSTEM + * then the system would be called "TRACE_SYSTEM". + */ + if (strcmp(call->system, "TRACE_SYSTEM") != 0) + d_events = event_subsystem_dir(call->system, d_events); + call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " diff --git a/kernel/trace/trace_events.h b/kernel/trace/trace_events.h index deb95e5006c..b015d7b1987 100644 --- a/kernel/trace/trace_events.h +++ b/kernel/trace/trace_events.h @@ -7,6 +7,7 @@ struct ftrace_event_call { char *name; + char *system; struct dentry *dir; int enabled; int (*regfunc)(void); @@ -44,6 +45,7 @@ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ + .system = STR(TRACE_SYSTEM), \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ } -- cgit v1.2.3-70-g09d2 From b628b3e629b1436710e59a21cc020fbb04a52ce1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 23:32:58 -0500 Subject: tracing: make the set_event and available_events subsystem aware This patch makes the event files, set_event and available_events aware of the subsystem. Now you can enable an entire subsystem with: echo 'irq:*' > set_event Note: the '*' is not needed. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 19332200c45..b811eb34352 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -12,6 +12,8 @@ #include "trace_events.h" +#define TRACE_SYSTEM "TRACE_SYSTEM" + #define events_for_each(event) \ for (event = __start_ftrace_events; \ (unsigned long)event < (unsigned long)__stop_ftrace_events; \ @@ -45,14 +47,47 @@ static void ftrace_clear_events(void) static int ftrace_set_clr_event(char *buf, int set) { struct ftrace_event_call *call = __start_ftrace_events; + char *event = NULL, *sub = NULL, *match; + int ret = -EINVAL; + + /* + * The buf format can be : + * *: means any event by that name. + * : is the same. + * + * :* means all events in that subsystem + * : means the same. + * + * (no ':') means all events in a subsystem with + * the name or any event that matches + */ + + match = strsep(&buf, ":"); + if (buf) { + sub = match; + event = buf; + match = NULL; + if (!strlen(sub) || strcmp(sub, "*") == 0) + sub = NULL; + if (!strlen(event) || strcmp(event, "*") == 0) + event = NULL; + } events_for_each(call) { if (!call->name) continue; - if (strcmp(buf, call->name) != 0) + if (match && + strcmp(match, call->name) != 0 && + strcmp(match, call->system) != 0) + continue; + + if (sub && strcmp(sub, call->system) != 0) + continue; + + if (event && strcmp(event, call->name) != 0) continue; if (set) { @@ -68,9 +103,9 @@ static int ftrace_set_clr_event(char *buf, int set) call->enabled = 0; call->unregfunc(); } - return 0; + ret = 0; } - return -EINVAL; + return ret; } /* 128 should be much more than enough */ @@ -200,6 +235,8 @@ static int t_show(struct seq_file *m, void *v) { struct ftrace_event_call *call = v; + if (strcmp(call->system, TRACE_SYSTEM) != 0) + seq_printf(m, "%s:", call->system); seq_printf(m, "%s\n", call->name); return 0; -- cgit v1.2.3-70-g09d2 From c32e827b25054cb17b79cf97fb5e63ae4ce2223c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 19:12:30 -0500 Subject: tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/events.c | 6 +- kernel/trace/trace.h | 19 ++++ kernel/trace/trace_events.c | 2 +- kernel/trace/trace_events.h | 57 ---------- kernel/trace/trace_events_stage_1.h | 34 ++++++ kernel/trace/trace_events_stage_2.h | 72 ++++++++++++ kernel/trace/trace_events_stage_3.h | 219 ++++++++++++++++++++++++++++++++++++ 7 files changed, 350 insertions(+), 59 deletions(-) delete mode 100644 kernel/trace/trace_events.h create mode 100644 kernel/trace/trace_events_stage_1.h create mode 100644 kernel/trace/trace_events_stage_2.h create mode 100644 kernel/trace/trace_events_stage_3.h (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 4e4e45860c5..f2509cbaace 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -8,6 +8,10 @@ #include -#include "trace_events.h" +#include "trace_output.h" + +#include "trace_events_stage_1.h" +#include "trace_events_stage_2.h" +#include "trace_events_stage_3.h" #include diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index adf161f6dd1..aa1ab0cb80a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,4 +726,23 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +struct ftrace_event_call { + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + struct dentry *raw_dir; + int raw_enabled; + int (*raw_init)(void); + int (*raw_reg)(void); + void (*raw_unreg)(void); +}; + +void event_trace_printk(unsigned long ip, const char *fmt, ...); +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b811eb34352..77a5c02bd63 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -10,7 +10,7 @@ #include #include -#include "trace_events.h" +#include "trace.h" #define TRACE_SYSTEM "TRACE_SYSTEM" diff --git a/kernel/trace/trace_events.h b/kernel/trace/trace_events.h deleted file mode 100644 index b015d7b1987..00000000000 --- a/kernel/trace/trace_events.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef _LINUX_KERNEL_TRACE_EVENTS_H -#define _LINUX_KERNEL_TRACE_EVENTS_H - -#include -#include -#include "trace.h" - -struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); -}; - - -#undef TPFMT -#define TPFMT(fmt, args...) fmt "\n", ##args - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (!ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = STR(TRACE_SYSTEM), \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ -} - -void event_trace_printk(unsigned long ip, const char *fmt, ...); -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; - -#endif /* _LINUX_KERNEL_TRACE_EVENTS_H */ diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h new file mode 100644 index 00000000000..fd3bf9382d3 --- /dev/null +++ b/kernel/trace/trace_events_stage_1.h @@ -0,0 +1,34 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in to include the following: + * + * struct ftrace_raw_ { + * struct trace_entry ent; + * ; + * [...] + * }; + * + * The is created by the TRACE_FIELD(type, item, assign) + * macro. We simply do "type item;", and that will create the fields + * in the structure. + */ + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#define TRACE_FIELD(type, item, assign) \ + type item; + +#include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h new file mode 100644 index 00000000000..3eaaef5f19e --- /dev/null +++ b/kernel/trace/trace_events_stage_2.h @@ -0,0 +1,72 @@ +/* + * Stage 2 of the trace events. + * + * Override the macros in to include the following: + * + * enum print_line_t + * ftrace_raw_output_(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_ *field; <-- defined in stage 1 + * struct trace_entry *entry; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * ret = trace_seq_printf(s, "%s", "\n"); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + field->item, + + +#undef TPRAWFMT +#define TPRAWFMT(args...) args + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h new file mode 100644 index 00000000000..7a161c49deb --- /dev/null +++ b/kernel/trace/trace_events_stage_3.h @@ -0,0 +1,219 @@ +/* + * Stage 3 of the trace events. + * + * Override the macros in to include the following: + * + * static void ftrace_event_(proto) + * { + * event_trace_printk(_RET_IP_, "() " ); + * } + * + * static int ftrace_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_event_); + * } + * + * For those macros defined with TRACE_FORMAT: + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * } + * + * + * For those macros defined with TRACE_EVENT_FORMAT: + * + * static struct ftrace_event_call event_; + * + * static void ftrace_raw_event_(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the TRACE_FIELD. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_raw_event_); + * } + * + * static struct trace_event ftrace_event_type_ = { + * .trace = ftrace_raw_output_, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * .raw_init = ftrace_raw_init_event_, + * .raw_reg = ftrace_raw_reg_event_, + * .raw_unreg = ftrace_raw_unreg_event_, + * } + * + */ + +#undef TPFMT +#define TPFMT(fmt, args...) fmt "\n", ##args + +#define _TRACE_FORMAT(call, proto, args, fmt) \ +static void ftrace_event_##call(proto) \ +{ \ + event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ +} \ + \ +static int ftrace_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_event_##call); \ + if (!ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call); \ + return ret; \ +} \ + \ +static void ftrace_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_event_##call); \ +} \ + + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = STR(TRACE_SYSTEM), \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ +} + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(struct ftrace_raw_##call), \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + tstruct; \ + \ + trace_current_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (!ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = STR(TRACE_SYSTEM), \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .raw_reg = ftrace_raw_reg_event_##call, \ + .raw_unreg = ftrace_raw_unreg_event_##call, \ +} -- cgit v1.2.3-70-g09d2 From fd99498989f3b3feeab89dcadf537138ba136d24 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 28 Feb 2009 02:41:25 -0500 Subject: tracing: add raw fast tracing interface for trace events This patch adds the interface to enable the C style trace points. In the directory /debugfs/tracing/events/subsystem/event We now have three files: enable : values 0 or 1 to enable or disable the trace event. available_types: values 'raw' and 'printf' which indicate the tracing types available for the trace point. If a developer does not use the TRACE_EVENT_FORMAT macro and just uses the TRACE_FORMAT macro, then only 'printf' will be available. This file is read only. type: values 'raw' or 'printf'. This indicates which type of tracing is active for that trace point. 'printf' is the default and if 'raw' is not available, this file is read only. # echo raw > /debug/tracing/events/sched/sched_wakeup/type # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable Will enable the C style tracing for the sched_wakeup trace point. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 7 ++ kernel/trace/trace_events.c | 199 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 181 insertions(+), 25 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index aa1ab0cb80a..f6fa0b9f83a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,6 +726,12 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* trace event type bit fields, not numeric */ +enum { + TRACE_EVENT_TYPE_PRINTF = 1, + TRACE_EVENT_TYPE_RAW = 2, +}; + struct ftrace_event_call { char *name; char *system; @@ -736,6 +742,7 @@ struct ftrace_event_call { int id; struct dentry *raw_dir; int raw_enabled; + int type; int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 77a5c02bd63..1d07f800a9c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -44,6 +44,36 @@ static void ftrace_clear_events(void) } } +static void ftrace_event_enable_disable(struct ftrace_event_call *call, + int enable) +{ + + switch (enable) { + case 0: + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + } + if (call->raw_enabled) { + call->raw_enabled = 0; + call->raw_unreg(); + } + break; + case 1: + if (!call->enabled && + (call->type & TRACE_EVENT_TYPE_PRINTF)) { + call->enabled = 1; + call->regfunc(); + } + if (!call->raw_enabled && + (call->type & TRACE_EVENT_TYPE_RAW)) { + call->raw_enabled = 1; + call->raw_reg(); + } + break; + } +} + static int ftrace_set_clr_event(char *buf, int set) { struct ftrace_event_call *call = __start_ftrace_events; @@ -90,19 +120,8 @@ static int ftrace_set_clr_event(char *buf, int set) if (event && strcmp(event, call->name) != 0) continue; - if (set) { - /* Already set? */ - if (call->enabled) - return 0; - call->enabled = 1; - call->regfunc(); - } else { - /* Already cleared? */ - if (!call->enabled) - return 0; - call->enabled = 0; - call->unregfunc(); - } + ftrace_event_enable_disable(call, set); + ret = 0; } return ret; @@ -273,7 +292,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled) + if (call->enabled || call->raw_enabled) buf = "1\n"; else buf = "0\n"; @@ -304,18 +323,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, switch (val) { case 0: - if (!call->enabled) - break; - - call->enabled = 0; - call->unregfunc(); - break; case 1: - if (call->enabled) - break; - - call->enabled = 1; - call->regfunc(); + ftrace_event_enable_disable(call, val); break; default: @@ -327,6 +336,107 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +event_type_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[16]; + int r = 0; + + if (call->type & TRACE_EVENT_TYPE_PRINTF) + r += sprintf(buf, "printf\n"); + + if (call->type & TRACE_EVENT_TYPE_RAW) + r += sprintf(buf+r, "raw\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[64]; + + /* + * If there's only one type, we can't change it. + * And currently we always have printf type, and we + * may or may not have raw type. + * + * This is a redundant check, the file should be read + * only if this is the case anyway. + */ + + if (!call->raw_init) + return -EPERM; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (!strncmp(buf, "printf", 6) && + (!buf[6] || isspace(buf[6]))) { + + call->type = TRACE_EVENT_TYPE_PRINTF; + + /* + * If raw enabled, the disable it and enable + * printf type. + */ + if (call->raw_enabled) { + call->raw_enabled = 0; + call->raw_unreg(); + + call->enabled = 1; + call->regfunc(); + } + + } else if (!strncmp(buf, "raw", 3) && + (!buf[3] || isspace(buf[3]))) { + + call->type = TRACE_EVENT_TYPE_RAW; + + /* + * If printf enabled, the disable it and enable + * raw type. + */ + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + + call->raw_enabled = 1; + call->raw_reg(); + } + } else + return -EINVAL; + + *ppos += cnt; + + return cnt; +} + +static ssize_t +event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[16]; + int r = 0; + + r += sprintf(buf, "printf\n"); + + if (call->raw_init) + r += sprintf(buf+r, "raw\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -362,6 +472,17 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; +static const struct file_operations ftrace_type_fops = { + .open = tracing_open_generic, + .read = event_type_read, + .write = event_type_write, +}; + +static const struct file_operations ftrace_available_types_fops = { + .open = tracing_open_generic, + .read = event_available_types_read, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -427,6 +548,7 @@ static int event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) { struct dentry *entry; + int ret; /* * If the trace point header did not define TRACE_SYSTEM @@ -435,6 +557,18 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) if (strcmp(call->system, "TRACE_SYSTEM") != 0) d_events = event_subsystem_dir(call->system, d_events); + if (call->raw_init) { + ret = call->raw_init(); + if (ret < 0) { + pr_warning("Could not initialize trace point" + " events/%s\n", call->name); + return ret; + } + } + + /* default the output to printf */ + call->type = TRACE_EVENT_TYPE_PRINTF; + call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -448,6 +582,21 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) pr_warning("Could not create debugfs " "'%s/enable' entry\n", call->name); + /* Only let type be writable, if we can change it */ + entry = debugfs_create_file("type", + call->raw_init ? 0644 : 0444, + call->dir, call, + &ftrace_type_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/type' entry\n", call->name); + + entry = debugfs_create_file("available_types", 0444, call->dir, call, + &ftrace_available_types_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/type' available_types\n", call->name); + return 0; } -- cgit v1.2.3-70-g09d2 From 11a241a3302277db05561e01477528629d806c4e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 11:49:04 -0500 Subject: tracing: add protection around modify trace event fields The trace event objects are currently not proctected against reentrancy. This patch adds a mutex around the modifications of the trace event fields. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1d07f800a9c..26069fa6b3b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -14,6 +14,8 @@ #define TRACE_SYSTEM "TRACE_SYSTEM" +static DEFINE_MUTEX(event_mutex); + #define events_for_each(event) \ for (event = __start_ftrace_events; \ (unsigned long)event < (unsigned long)__stop_ftrace_events; \ @@ -104,6 +106,7 @@ static int ftrace_set_clr_event(char *buf, int set) event = NULL; } + mutex_lock(&event_mutex); events_for_each(call) { if (!call->name) @@ -124,6 +127,8 @@ static int ftrace_set_clr_event(char *buf, int set) ret = 0; } + mutex_unlock(&event_mutex); + return ret; } @@ -324,7 +329,9 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, switch (val) { case 0: case 1: + mutex_lock(&event_mutex); ftrace_event_enable_disable(call, val); + mutex_unlock(&event_mutex); break; default: -- cgit v1.2.3-70-g09d2 From 981d081ec8b958b7d962ee40d433581a55d40fc5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 13:53:59 -0500 Subject: tracing: add format file to describe event struct fields This patch adds the "format" file to the trace point event directory. This is based off of work by Tom Zanussi, in which a file is exported to be tread from user land such that a user space app may read the binary record stored in the ring buffer. # cat /debug/tracing/events/sched/sched_switch/format field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 56 ++++++++++++++++++++++++++++++++++++- kernel/trace/trace_events_stage_2.h | 52 ++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 2 ++ 4 files changed, 110 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cf6ba4181b1..e606633fb49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -754,6 +754,7 @@ struct ftrace_event_call { int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); + int (*show_format)(struct trace_seq *s); }; void event_trace_printk(unsigned long ip, const char *fmt, ...); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 26069fa6b3b..d57a772981c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3,6 +3,9 @@ * * Copyright (C) 2008 Red Hat Inc, Steven Rostedt * + * - Added format output of fields of the trace point. + * This was based off of work by Tom Zanussi . + * */ #include @@ -444,6 +447,42 @@ event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +static ssize_t +event_format_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct trace_seq *s; + char *buf; + int r; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + if (*ppos) + return 0; + + r = call->show_format(s); + if (!r) { + /* + * ug! The format output is bigger than a PAGE!! + */ + buf = "FORMAT TOO BIG\n"; + r = simple_read_from_buffer(ubuf, cnt, ppos, + buf, strlen(buf)); + goto out; + } + + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, s->len); + out: + kfree(s); + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -490,6 +529,11 @@ static const struct file_operations ftrace_available_types_fops = { .read = event_available_types_read, }; +static const struct file_operations ftrace_event_format_fops = { + .open = tracing_open_generic, + .read = event_format_read, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -602,7 +646,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) &ftrace_available_types_fops); if (!entry) pr_warning("Could not create debugfs " - "'%s/type' available_types\n", call->name); + "'%s/available_types' entry\n", call->name); + + /* A trace may not want to export its format */ + if (!call->show_format) + return 0; + + entry = debugfs_create_file("format", 0444, call->dir, call, + &ftrace_event_format_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/format' entry\n", call->name); return 0; } diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index dc79fe3a2ec..3a80ea4e92c 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -74,3 +74,55 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ } #include + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + return ret; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 2ab65e95822..c62a4d2a528 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -101,6 +101,7 @@ * .raw_init = ftrace_raw_init_event_, * .raw_reg = ftrace_raw_reg_event_, * .raw_unreg = ftrace_raw_unreg_event_, + * .show_format = ftrace_format_, * } * */ @@ -230,4 +231,5 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .raw_reg = ftrace_raw_reg_event_##call, \ .raw_unreg = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ } -- cgit v1.2.3-70-g09d2 From 91729ef96661bfa7dc53923746cd90b62d5495cc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 15:03:01 -0500 Subject: tracing: add ftrace headers to event format files This patch includes the ftrace header to the event formats files: # cat /debug/tracing/events/sched/sched_switch/format field:unsigned char type; offset:0; size:1; field:unsigned char flags; offset:1; size:1; field:unsigned char preempt_count; offset:2; size:1; field:int pid; offset:4; size:4; field:int tgid; offset:8; size:4; field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; A blank line is used as a deliminator between the ftrace header and the trace point fields. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d57a772981c..cdcc3aed76f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -13,7 +13,7 @@ #include #include -#include "trace.h" +#include "trace_output.h" #define TRACE_SYSTEM "TRACE_SYSTEM" @@ -447,6 +447,28 @@ event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +#undef FIELD +#define FIELD(type, name) \ + #type, #name, offsetof(typeof(field), name), sizeof(field.name) + +static int trace_write_header(struct trace_seq *s) +{ + struct trace_entry field; + + /* struct trace_entry */ + return trace_seq_printf(s, + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\n", + FIELD(unsigned char, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, tgid)); +} static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -465,6 +487,9 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, if (*ppos) return 0; + /* If this fails, so will the show_format. */ + trace_write_header(s); + r = call->show_format(s); if (!r) { /* -- cgit v1.2.3-70-g09d2 From c5e4e19271edfdf1abd4184933d40d646da6a091 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 15:10:02 -0500 Subject: tracing: add trace name and id to event formats To be able to identify the trace in the binary format output, the id of the trace event (which is dynamically assigned) must also be listed. This patch adds the name of the trace point as well as the id assigned. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index cdcc3aed76f..210e71ff82d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -487,7 +487,11 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, if (*ppos) return 0; - /* If this fails, so will the show_format. */ + /* If any of the first writes fail, so will the show_format. */ + + trace_seq_printf(s, "name: %s\n", call->name); + trace_seq_printf(s, "ID: %d\n", call->id); + trace_seq_printf(s, "format:\n"); trace_write_header(s); r = call->show_format(s); -- cgit v1.2.3-70-g09d2 From 770cb24345c0f6e0d47bd2b94aa6d67bea6f8b54 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Mar 2009 21:35:29 -0500 Subject: tracing: add format files for ftrace default entries Impact: allow user apps to read binary format of basic ftrace entries Currently, only defined raw events export their formats so a binary reader can parse them. There's no reason that the default ftrace entries can't export their formats. This patch adds a subsystem called "ftrace" in the events directory that includes the ftrace entries for basic ftrace recorded items. These only have three files in the events directory: type : printf available_types : printf format : format for the event entry For example: # cat /debug/tracing/events/ftrace/wakeup/format name: wakeup ID: 3 format: field:unsigned char type; offset:0; size:1; field:unsigned char flags; offset:1; size:1; field:unsigned char preempt_count; offset:2; size:1; field:int pid; offset:4; size:4; field:int tgid; offset:8; size:4; field:unsigned int prev_pid; offset:12; size:4; field:unsigned char prev_prio; offset:16; size:1; field:unsigned char prev_state; offset:17; size:1; field:unsigned int next_pid; offset:20; size:4; field:unsigned char next_prio; offset:24; size:1; field:unsigned char next_state; offset:25; size:1; field:unsigned int next_cpu; offset:28; size:4; print fmt: "%u:%u:%u ==+ %u:%u:%u [%03u]" Signed-off-by: Steven Rostedt --- kernel/trace/Makefile | 1 + kernel/trace/trace_event_types.h | 165 +++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events.c | 12 +-- kernel/trace/trace_export.c | 81 +++++++++++++++++++ kernel/trace/trace_format.h | 2 +- 5 files changed, 255 insertions(+), 6 deletions(-) create mode 100644 kernel/trace/trace_event_types.h create mode 100644 kernel/trace/trace_export.c (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c931fe0560c..f44736c7574 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -41,5 +41,6 @@ obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o obj-$(CONFIG_EVENT_TRACER) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o +obj-$(CONFIG_EVENT_TRACER) += trace_export.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h new file mode 100644 index 00000000000..fb4eba16643 --- /dev/null +++ b/kernel/trace/trace_event_types.h @@ -0,0 +1,165 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ftrace + +/* + * We cheat and use the proto type field as the ID + * and args as the entry type (minus 'struct') + */ +TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, ip, ip) + TRACE_FIELD(unsigned long, parent_ip, parent_ip) + ), + TPRAWFMT(" %lx <-- %lx") +); + +TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, + ftrace_graph_ent_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, graph_ent.func, func) + TRACE_FIELD(int, graph_ent.depth, depth) + ), + TPRAWFMT("--> %lx (%d)") +); + +TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, + ftrace_graph_ret_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, ret.func, func) + TRACE_FIELD(int, ret.depth, depth) + ), + TPRAWFMT("<-- %lx (%d)") +); + +TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned int, prev_pid, prev_pid) + TRACE_FIELD(unsigned char, prev_prio, prev_prio) + TRACE_FIELD(unsigned char, prev_state, prev_state) + TRACE_FIELD(unsigned int, next_pid, next_pid) + TRACE_FIELD(unsigned char, next_prio, next_prio) + TRACE_FIELD(unsigned char, next_state, next_state) + TRACE_FIELD(unsigned int, next_cpu, next_cpu) + ), + TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") +); + +TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned int, prev_pid, prev_pid) + TRACE_FIELD(unsigned char, prev_prio, prev_prio) + TRACE_FIELD(unsigned char, prev_state, prev_state) + TRACE_FIELD(unsigned int, next_pid, next_pid) + TRACE_FIELD(unsigned char, next_prio, next_prio) + TRACE_FIELD(unsigned char, next_state, next_state) + TRACE_FIELD(unsigned int, next_cpu, next_cpu) + ), + TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") +); + +TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, arg1, arg1) + TRACE_FIELD(unsigned long, arg2, arg2) + TRACE_FIELD(unsigned long, arg3, arg3) + ), + TPRAWFMT("(%08lx) (%08lx) (%08lx)") +); + +/* + * Stack-trace entry: + */ + +/* #define FTRACE_STACK_ENTRIES 8 */ + +TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, caller[0], stack0) + TRACE_FIELD(unsigned long, caller[1], stack1) + TRACE_FIELD(unsigned long, caller[2], stack2) + TRACE_FIELD(unsigned long, caller[3], stack3) + TRACE_FIELD(unsigned long, caller[4], stack4) + TRACE_FIELD(unsigned long, caller[5], stack5) + TRACE_FIELD(unsigned long, caller[6], stack6) + TRACE_FIELD(unsigned long, caller[7], stack7) + ), + TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") +); + +TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, caller[0], stack0) + TRACE_FIELD(unsigned long, caller[1], stack1) + TRACE_FIELD(unsigned long, caller[2], stack2) + TRACE_FIELD(unsigned long, caller[3], stack3) + TRACE_FIELD(unsigned long, caller[4], stack4) + TRACE_FIELD(unsigned long, caller[5], stack5) + TRACE_FIELD(unsigned long, caller[6], stack6) + TRACE_FIELD(unsigned long, caller[7], stack7) + ), + TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") +); + +TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, ip, ip) + TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD_ZERO_CHAR(buf) + ), + TPRAWFMT("%08lx (%d) %s") +); + +TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned int, line, line) + TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func) + TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) + TRACE_FIELD(char, correct, correct) + ), + TPRAWFMT("%u:%s:%s (%u)") +); + +TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(u64, from, from) + TRACE_FIELD(u64, to, to) + ), + TPRAWFMT("from: %llx to: %llx") +); + +TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, + TRACE_STRUCT( + TRACE_FIELD(ktime_t, state_data.stamp, stamp) + TRACE_FIELD(ktime_t, state_data.end, end) + TRACE_FIELD(int, state_data.type, type) + TRACE_FIELD(int, state_data.state, state) + ), + TPRAWFMT("%llx->%llx type:%u state:%u") +); + +TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) + TRACE_FIELD(unsigned long, call_site, call_site) + TRACE_FIELD(const void *, ptr, ptr) + TRACE_FIELD(size_t, bytes_req, bytes_req) + TRACE_FIELD(size_t, bytes_alloc, bytes_alloc) + TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) + TRACE_FIELD(int, node, node) + ), + TPRAWFMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" + " flags:%x node:%d") +); + +TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id) + TRACE_FIELD(unsigned long, call_site, call_site) + TRACE_FIELD(const void *, ptr, ptr) + ), + TPRAWFMT("type:%u call_site:%lx ptr:%p") +); + +#undef TRACE_SYSTEM diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 210e71ff82d..4488d90e75e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -656,11 +656,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return -1; } - entry = debugfs_create_file("enable", 0644, call->dir, call, - &ftrace_enable_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/enable' entry\n", call->name); + if (call->regfunc) { + entry = debugfs_create_file("enable", 0644, call->dir, call, + &ftrace_enable_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/enable' entry\n", call->name); + } /* Only let type be writable, if we can change it */ entry = debugfs_create_file("type", diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c new file mode 100644 index 00000000000..0fb7be73e31 --- /dev/null +++ b/kernel/trace/trace_export.c @@ -0,0 +1,81 @@ +/* + * trace_export.c - export basic ftrace utilities to user space + * + * Copyright (C) 2009 Steven Rostedt + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace_output.h" + +#include "trace_format.h" + +#undef TRACE_FIELD_ZERO_CHAR +#define TRACE_FIELD_ZERO_CHAR(item) \ + ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + "offset:%lu;\tsize:0;\n", \ + offsetof(typeof(field), item)); \ + if (!ret) \ + return 0; + + +#undef TPRAWFMT +#define TPRAWFMT(args...) args + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct args field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + \ + return ret; \ +} + +#include "trace_event_types.h" + +#undef TRACE_ZERO_CHAR +#define TRACE_ZERO_CHAR(arg) + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; + +#undef TPCMD +#define TPCMD(cmd...) cmd + +#undef TRACE_ENTRY +#define TRACE_ENTRY entry + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + cmd; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .id = proto, \ + .system = __stringify(TRACE_SYSTEM), \ + .show_format = ftrace_format_##call, \ +} +#include "trace_event_types.h" diff --git a/kernel/trace/trace_format.h b/kernel/trace/trace_format.h index 53a6b135711..03f9a4c165c 100644 --- a/kernel/trace/trace_format.h +++ b/kernel/trace/trace_format.h @@ -40,7 +40,7 @@ #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -int \ +static int \ ftrace_format_##call(struct trace_seq *s) \ { \ struct ftrace_raw_##call field; \ -- cgit v1.2.3-70-g09d2 From 156b5f172a64103bcb13b6d26288388b9019caa3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 6 Mar 2009 10:50:53 -0500 Subject: tracing: typecast sizeof and offsetof to unsigned int Impact: fix compiler warnings On x86_64 sizeof and offsetof are treated as long, where as on x86_32 they are int. This patch typecasts them to unsigned int to avoid one arch giving warnings while the other does not. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 15 ++++++++------- kernel/trace/trace_export.c | 10 +++++----- kernel/trace/trace_format.h | 12 ++++++------ 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4488d90e75e..fa32ca32076 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -448,8 +448,9 @@ event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, } #undef FIELD -#define FIELD(type, name) \ - #type, #name, offsetof(typeof(field), name), sizeof(field.name) +#define FIELD(type, name) \ + #type, #name, (unsigned int)offsetof(typeof(field), name), \ + (unsigned int)sizeof(field.name) static int trace_write_header(struct trace_seq *s) { @@ -457,11 +458,11 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" "\n", FIELD(unsigned char, type), FIELD(unsigned char, flags), diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 0fb7be73e31..7162ab49d05 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -18,11 +18,11 @@ #include "trace_format.h" #undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ - "offset:%lu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ - if (!ret) \ +#define TRACE_FIELD_ZERO_CHAR(item) \ + ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + "offset:%u;\tsize:0;\n", \ + (unsigned int)offsetof(typeof(field), item)); \ + if (!ret) \ return 0; diff --git a/kernel/trace/trace_format.h b/kernel/trace/trace_format.h index 03f9a4c165c..97e59a9c82e 100644 --- a/kernel/trace/trace_format.h +++ b/kernel/trace/trace_format.h @@ -22,9 +22,9 @@ #undef TRACE_FIELD #define TRACE_FIELD(type, item, assign) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%lu;\tsize:%lu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ if (!ret) \ return 0; @@ -32,9 +32,9 @@ #undef TRACE_FIELD_SPECIAL #define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ - "offset:%lu;\tsize:%lu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ if (!ret) \ return 0; -- cgit v1.2.3-70-g09d2 From da4d03020c2af32f73e8bfbab0a66620d85bb9bb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Mar 2009 17:14:30 -0400 Subject: tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 + include/trace/sched_event_types.h | 48 +++++++---- kernel/trace/trace.h | 5 -- kernel/trace/trace_event_types.h | 3 +- kernel/trace/trace_events.c | 159 +----------------------------------- kernel/trace/trace_events_stage_1.h | 28 ++++--- kernel/trace/trace_events_stage_2.h | 89 ++++++++++++++++---- kernel/trace/trace_events_stage_3.h | 34 +++----- kernel/trace/trace_export.c | 23 +++++- 9 files changed, 159 insertions(+), 233 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3bcc3e17144..6b4f1bb3701 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -160,4 +160,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) +#define TRACE_EVENT(name, proto, args, struct, print, assign) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index 71b14828a95..aa77fb75403 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -62,25 +62,41 @@ TRACE_EVENT_FORMAT(sched_wakeup_new, TP_RAW_FMT("task %d success=%d") ); -TRACE_EVENT_FORMAT(sched_switch, +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next), + TP_ARGS(rq, prev, next), - TP_FMT("task %s:%d ==> %s:%d", - prev->comm, prev->pid, next->comm, next->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, prev_pid, prev->pid) - TRACE_FIELD(int, prev_prio, prev->prio) - TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], - next_comm, - TP_CMD(memcpy(TRACE_ENTRY->next_comm, - next->comm, - TASK_COMM_LEN))) - TRACE_FIELD(pid_t, next_pid, next->pid) - TRACE_FIELD(int, next_prio, next->prio) + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) ), - TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") - ); + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ) +); TRACE_EVENT_FORMAT(sched_migrate_task, TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2bfb7d11fc1..c5e1d8865fe 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -751,12 +751,7 @@ struct ftrace_event_call { int (*regfunc)(void); void (*unregfunc)(void); int id; - struct dentry *raw_dir; - int raw_enabled; - int type; int (*raw_init)(void); - int (*raw_reg)(void); - void (*raw_unreg)(void); int (*show_format)(struct trace_seq *s); }; diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index d94179aa1fc..5cca4c978bd 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -106,9 +106,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD(char *, fmt, fmt) TRACE_FIELD_ZERO_CHAR(buf) ), - TP_RAW_FMT("%08lx (%d) %s") + TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fa32ca32076..1880a643809 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -59,22 +59,12 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, call->enabled = 0; call->unregfunc(); } - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - } break; case 1: - if (!call->enabled && - (call->type & TRACE_EVENT_TYPE_PRINTF)) { + if (!call->enabled) { call->enabled = 1; call->regfunc(); } - if (!call->raw_enabled && - (call->type & TRACE_EVENT_TYPE_RAW)) { - call->raw_enabled = 1; - call->raw_reg(); - } break; } } @@ -300,7 +290,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled || call->raw_enabled) + if (call->enabled) buf = "1\n"; else buf = "0\n"; @@ -346,107 +336,6 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } -static ssize_t -event_type_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - if (call->type & TRACE_EVENT_TYPE_PRINTF) - r += sprintf(buf, "printf\n"); - - if (call->type & TRACE_EVENT_TYPE_RAW) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[64]; - - /* - * If there's only one type, we can't change it. - * And currently we always have printf type, and we - * may or may not have raw type. - * - * This is a redundant check, the file should be read - * only if this is the case anyway. - */ - - if (!call->raw_init) - return -EPERM; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - if (!strncmp(buf, "printf", 6) && - (!buf[6] || isspace(buf[6]))) { - - call->type = TRACE_EVENT_TYPE_PRINTF; - - /* - * If raw enabled, the disable it and enable - * printf type. - */ - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - - call->enabled = 1; - call->regfunc(); - } - - } else if (!strncmp(buf, "raw", 3) && - (!buf[3] || isspace(buf[3]))) { - - call->type = TRACE_EVENT_TYPE_RAW; - - /* - * If printf enabled, the disable it and enable - * raw type. - */ - if (call->enabled) { - call->enabled = 0; - call->unregfunc(); - - call->raw_enabled = 1; - call->raw_reg(); - } - } else - return -EINVAL; - - *ppos += cnt; - - return cnt; -} - -static ssize_t -event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - r += sprintf(buf, "printf\n"); - - if (call->raw_init) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - #undef FIELD #define FIELD(type, name) \ #type, #name, (unsigned int)offsetof(typeof(field), name), \ @@ -470,6 +359,7 @@ static int trace_write_header(struct trace_seq *s) FIELD(int, pid), FIELD(int, tgid)); } + static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -527,13 +417,6 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; -static const struct file_operations ftrace_avail_fops = { - .open = ftrace_event_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_seq_open, .read = seq_read, @@ -548,17 +431,6 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; -static const struct file_operations ftrace_type_fops = { - .open = tracing_open_generic, - .read = event_type_read, - .write = event_type_write, -}; - -static const struct file_operations ftrace_available_types_fops = { - .open = tracing_open_generic, - .read = event_available_types_read, -}; - static const struct file_operations ftrace_event_format_fops = { .open = tracing_open_generic, .read = event_format_read, @@ -647,9 +519,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } - /* default the output to printf */ - call->type = TRACE_EVENT_TYPE_PRINTF; - call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -665,21 +534,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) "'%s/enable' entry\n", call->name); } - /* Only let type be writable, if we can change it */ - entry = debugfs_create_file("type", - call->raw_init ? 0644 : 0444, - call->dir, call, - &ftrace_type_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/type' entry\n", call->name); - - entry = debugfs_create_file("available_types", 0444, call->dir, call, - &ftrace_available_types_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/available_types' entry\n", call->name); - /* A trace may not want to export its format */ if (!call->show_format) return 0; @@ -704,13 +558,6 @@ static __init int event_trace_init(void) if (!d_tracer) return 0; - entry = debugfs_create_file("available_events", 0444, d_tracer, - (void *)&show_event_seq_ops, - &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_events' entry\n"); - entry = debugfs_create_file("set_event", 0644, d_tracer, (void *)&show_set_event_seq_ops, &ftrace_set_event_fops); diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 3830a731424..edfcbd3a0d1 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -18,19 +18,23 @@ #define TRACE_FORMAT(call, proto, args, fmt) #undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) + +#undef __array +#define __array(type, item, len) type item[len]; -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __field +#define __field(type, item) type item; -#define TRACE_FIELD(type, item, assign) \ - type item; -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - type_item; +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, print, assign) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name #include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 8e2e0f56c2a..d91bf4c5666 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -32,23 +32,14 @@ * in binary. */ -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __entry +#define __entry field -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - field->item, +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - field->item, - - -#undef TP_RAW_FMT -#define TP_RAW_FMT(args...) args - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ @@ -66,14 +57,76 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ - ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ + ret = trace_seq_printf(s, print); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + #include -#include "trace_format.h" +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry "REC" + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, func) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 41b82b93c9c..8e398d86409 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -144,27 +144,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .unregfunc = ftrace_unreg_event_##call, \ } -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TP_CMD -#define TP_CMD(cmd...) cmd - -#undef TRACE_ENTRY -#define TRACE_ENTRY entry +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, raw) \ + TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - cmd; +#undef __entry +#define __entry entry -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ \ static struct ftrace_event_call event_##call; \ \ @@ -185,7 +173,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - tstruct; \ + assign; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ } \ @@ -226,10 +214,8 @@ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ .raw_init = ftrace_raw_init_event_##call, \ - .raw_reg = ftrace_raw_reg_event_##call, \ - .raw_unreg = ftrace_raw_unreg_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ } diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index e62bc10f810..23ae78430d5 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -15,7 +15,28 @@ #include "trace_output.h" -#include "trace_format.h" + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; #undef TRACE_FIELD_ZERO_CHAR #define TRACE_FIELD_ZERO_CHAR(item) \ -- cgit v1.2.3-70-g09d2 From ce8eb2bf05042452107e489782105d2e235cbdd0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 10:14:35 -0400 Subject: tracing: fix printk format specifier Impact: clean up The offsetof and sizeof are of type size_t, and instead of typecasting them to unsigned int for printk formatting, one could just use %zu. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1880a643809..a0b41cc26f2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -338,8 +338,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, #undef FIELD #define FIELD(type, name) \ - #type, #name, (unsigned int)offsetof(typeof(field), name), \ - (unsigned int)sizeof(field.name) + #type, #name, offsetof(typeof(field), name), sizeof(field.name) static int trace_write_header(struct trace_seq *s) { @@ -347,11 +346,11 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", FIELD(unsigned char, type), FIELD(unsigned char, flags), -- cgit v1.2.3-70-g09d2 From 40e26815fafd3b8c4aced17b1f22e68ef33eb8db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 11:32:40 -0400 Subject: tracing: do not allow modifying the ftrace events via the event files Impact: fix to prevent crash on calling NULL function pointer The ftrace internal records have their format exported via the event system under the ftrace subsystem. These are only for exporting the format to allow binary readers to be able to parse them in a binary output. The ftrace subsystem events can only be enabled via the ftrace tracers and do not have a registering function. The event files expect the event record to have registering function and will call it directly. Passing in a ftrace subsystem event will cause the kernel to crash because it will execute a NULL pointer. This patch prevents the ftrace subsystem from being viewable to the event enabling files. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a0b41cc26f2..85ec10fbb38 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -102,7 +102,7 @@ static int ftrace_set_clr_event(char *buf, int set) mutex_lock(&event_mutex); events_for_each(call) { - if (!call->name) + if (!call->name || !call->regfunc) continue; if (match && @@ -207,8 +207,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) - return NULL; + for (;;) { + if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + return NULL; + + /* + * The ftrace subsystem is for showing formats only. + * They can not be enabled or disabled via the event files. + */ + if (call->regfunc) + break; + + call++; + next = call; + } m->private = ++next; -- cgit v1.2.3-70-g09d2 From 2314c4ae1461c9e8b26cf8b9a851f280bc5769e1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Mar 2009 12:04:02 -0400 Subject: tracing: add back the available_events file The event directory files type and available_types were no longer needed with the new TRACE_EVENT_FORMAT macros, they were deleted. But by accident the available_events file was also removed. This patch brings it back. Reported-by: KOSAKI Motohiro Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 85ec10fbb38..769dfd00fc8 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -428,6 +428,13 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; +static const struct file_operations ftrace_avail_fops = { + .open = ftrace_event_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_seq_open, .read = seq_read, @@ -569,6 +576,13 @@ static __init int event_trace_init(void) if (!d_tracer) return 0; + entry = debugfs_create_file("available_events", 0444, d_tracer, + (void *)&show_event_seq_ops, + &ftrace_avail_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'available_events' entry\n"); + entry = debugfs_create_file("set_event", 0644, d_tracer, (void *)&show_set_event_seq_ops, &ftrace_set_event_fops); -- cgit v1.2.3-70-g09d2 From 1852fcce181faa237c010a3dbedb473cf9d4555f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 14:33:00 -0400 Subject: tracing: expand the ring buffers when an event is activated To save memory, the tracer ring buffers are set to a minimum. The activating of a trace expands the ring buffer size. This patch adds this expanding, when an event is activated. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 20 ++++++++++++++++++++ kernel/trace/trace.h | 3 +++ kernel/trace/trace_events.c | 8 ++++++++ 3 files changed, 31 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c1dc185085..35ee63ae412 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2357,6 +2357,26 @@ static int tracing_resize_ring_buffer(unsigned long size) return ret; } +/** + * tracing_update_buffers - used by tracing facility to expand ring buffers + * + * To save on memory when the tracing is never used on a system with it + * configured in. The ring buffers are set to a minimum size. But once + * a user starts to use the tracing facility, then they need to grow + * to their default size. + * + * This function is to be called when a tracer is about to be used. + */ +int tracing_update_buffers(void) +{ + int ret = 0; + + if (!ring_buffer_expanded) + ret = tracing_resize_ring_buffer(trace_buf_size); + + return ret; +} + struct trace_option_dentry; static struct trace_option_dentry * diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5e1d8865fe..336324d717f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -737,6 +737,9 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* set ring buffers to default size if not already done so */ +int tracing_update_buffers(void); + /* trace event type bit fields, not numeric */ enum { TRACE_EVENT_TYPE_PRINTF = 1, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 769dfd00fc8..ca624df7359 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -141,6 +141,10 @@ ftrace_event_write(struct file *file, const char __user *ubuf, if (!cnt || cnt < 0) return 0; + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + ret = get_user(ch, ubuf++); if (ret) return ret; @@ -331,6 +335,10 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, if (ret < 0) return ret; + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + switch (val) { case 0: case 1: -- cgit v1.2.3-70-g09d2 From e9fb2b6d5845e24f104713591286b6f39761c027 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Mar 2009 14:19:25 -0400 Subject: tracing: have event_trace_printk use static tracer Impact: speed up on event tracing The event_trace_printk is currently a wrapper function that calls trace_vprintk. Because it uses a variable for the fmt it misses out on the optimization of using the binary printk. This patch makes event_trace_printk into a macro wrapper to use the fmt as the same as the trace_printks. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 17 +++++++++++++++++ kernel/trace/trace_events.c | 10 ---------- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cede1ab49d0..35cfa7bbaf3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -773,4 +773,21 @@ void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; +extern const char *__start___trace_bprintk_fmt[]; +extern const char *__stop___trace_bprintk_fmt[]; + +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ca624df7359..238ea95a411 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -24,16 +24,6 @@ static DEFINE_MUTEX(event_mutex); (unsigned long)event < (unsigned long)__stop_ftrace_events; \ event++) -void event_trace_printk(unsigned long ip, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - tracing_record_cmdline(current); - trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); - va_end(ap); -} - static void ftrace_clear_events(void) { struct ftrace_event_call *call = (void *)__start_ftrace_events; -- cgit v1.2.3-70-g09d2 From c269fc8c537d761f36cb98e637ae934d9331a9d5 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 17 Mar 2009 01:20:59 -0500 Subject: tracing: fix leak in event_format_read() Impact: fix memory leak If event_format_read() exits early due to nonzero ppos, the previous kmalloc doesn't get freed - might as well do the check before the kmalloc and avoid the problem. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237270859.8033.141.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 238ea95a411..c88227b3b9d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -378,15 +378,15 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, char *buf; int r; + if (*ppos) + return 0; + s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; trace_seq_init(s); - if (*ppos) - return 0; - /* If any of the first writes fail, so will the show_format. */ trace_seq_printf(s, "name: %s\n", call->name); -- cgit v1.2.3-70-g09d2 From 23725aeeab10ba02bcf10ec49ad73146b54cb52f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:13 +0100 Subject: ftrace: provide an id file for each event Since not every event has a format file to read the id from, expose it explicitly in a separate file. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <20090319194233.372534033@chello.nl> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c88227b3b9d..7763db8fd0b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -412,6 +412,29 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, return r; } +static ssize_t +event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + trace_seq_printf(s, "%d\n", call->id); + + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, s->len); + kfree(s); + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -452,6 +475,11 @@ static const struct file_operations ftrace_event_format_fops = { .read = event_format_read, }; +static const struct file_operations ftrace_event_id_fops = { + .open = tracing_open_generic, + .read = event_id_read, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -550,6 +578,14 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) "'%s/enable' entry\n", call->name); } + if (call->id) { + entry = debugfs_create_file("id", 0444, call->dir, call, + &ftrace_event_id_fops); + if (!entry) + pr_warning("Could not create debugfs '%s/id' entry\n", + call->name); + } + /* A trace may not want to export its format */ if (!call->show_format) return 0; -- cgit v1.2.3-70-g09d2 From ac199db0189c091f2863312061c0575937f68810 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:15 +0100 Subject: ftrace: event profile hooks Impact: new tracing infrastructure feature Provide infrastructure to generate software perf counter events from tracepoints. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <20090319194233.557364871@chello.nl> Signed-off-by: Ingo Molnar --- kernel/trace/Makefile | 1 + kernel/trace/events.c | 1 - kernel/trace/trace.h | 11 ++++++++++ kernel/trace/trace_event_profile.c | 31 ++++++++++++++++++++++++++ kernel/trace/trace_events.c | 9 ++------ kernel/trace/trace_events_stage_3.h | 44 +++++++++++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 8 deletions(-) create mode 100644 kernel/trace/trace_event_profile.c (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c3feea01c3e..0e45c206c2f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -44,5 +44,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACER) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o +obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o libftrace-y := ftrace.o diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 9fc918da404..246f2aa6dc4 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -12,4 +12,3 @@ #include "trace_events_stage_2.h" #include "trace_events_stage_3.h" -#include diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7c9a0cbf5dc..7cfb741be20 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -785,12 +785,23 @@ struct ftrace_event_call { int id; int (*raw_init)(void); int (*show_format)(struct trace_seq *s); + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif }; void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; +#define for_each_event(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c new file mode 100644 index 00000000000..22cba997077 --- /dev/null +++ b/kernel/trace/trace_event_profile.c @@ -0,0 +1,31 @@ +/* + * trace event based perf counter profiling + * + * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra + * + */ + +#include "trace.h" + +int ftrace_profile_enable(int event_id) +{ + struct ftrace_event_call *event; + + for_each_event(event) { + if (event->id == event_id) + return event->profile_enable(event); + } + + return -EINVAL; +} + +void ftrace_profile_disable(int event_id) +{ + struct ftrace_event_call *event; + + for_each_event(event) { + if (event->id == event_id) + return event->profile_disable(event); + } +} + diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7763db8fd0b..3047b56f663 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -19,11 +19,6 @@ static DEFINE_MUTEX(event_mutex); -#define events_for_each(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - static void ftrace_clear_events(void) { struct ftrace_event_call *call = (void *)__start_ftrace_events; @@ -90,7 +85,7 @@ static int ftrace_set_clr_event(char *buf, int set) } mutex_lock(&event_mutex); - events_for_each(call) { + for_each_event(call) { if (!call->name || !call->regfunc) continue; @@ -628,7 +623,7 @@ static __init int event_trace_init(void) if (!d_events) return 0; - events_for_each(call) { + for_each_event(call) { /* The linker may leave blanks */ if (!call->name) continue; diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 4c26d97b450..6b3261ca988 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -109,6 +109,40 @@ #undef TP_FMT #define TP_FMT(fmt, args...) fmt "\n", ##args +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +{ \ + if (atomic_add_negative(-1, &call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + #define _TRACE_FORMAT(call, proto, args, fmt) \ static void ftrace_event_##call(proto) \ { \ @@ -147,6 +181,7 @@ static int ftrace_init_event_##call(void) \ #undef TRACE_FORMAT #define TRACE_FORMAT(call, proto, args, fmt) \ _TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ @@ -155,6 +190,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_init_event_##call, \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ + _TRACE_PROFILE_INIT(call) \ } #undef __entry @@ -162,6 +198,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ \ static struct ftrace_event_call event_##call; \ \ @@ -227,4 +264,11 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ + _TRACE_PROFILE_INIT(call) \ } + +#include + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + -- cgit v1.2.3-70-g09d2 From cf027f645e6aee4f0ca6197a6b6a57f327fdb13f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:30:39 -0500 Subject: tracing: add run-time field descriptions for event filtering This patch makes the field descriptions defined for event tracing available at run-time, for the event-filtering mechanism introduced in a subsequent patch. The common event fields are prepended with 'common_' in the format display, allowing them to be distinguished from the other fields that might internally have same name and can therefore be unambiguously used in filters. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710639.7703.46.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 30 +++++++++++++++++-------- kernel/trace/trace_events.c | 40 ++++++++++++++++++++++++++++++++- kernel/trace/trace_events_stage_2.h | 45 +++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 2 ++ 4 files changed, 107 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be20..9288dc7ad14 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -775,16 +775,26 @@ enum { TRACE_EVENT_TYPE_RAW = 2, }; +struct ftrace_event_field { + struct list_head link; + char *name; + char *type; + int offset; + int size; +}; + struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); - int id; - int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; @@ -793,6 +803,8 @@ struct ftrace_event_call { #endif }; +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size); void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3047b56f663..961b057da28 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -19,6 +19,34 @@ static DEFINE_MUTEX(event_mutex); +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size) +{ + struct ftrace_event_field *field; + + field = kmalloc(sizeof(*field), GFP_KERNEL); + if (!field) + goto err; + field->name = kstrdup(name, GFP_KERNEL); + if (!field->name) + goto err; + field->type = kstrdup(type, GFP_KERNEL); + if (!field->type) + goto err; + field->offset = offset; + field->size = size; + list_add(&field->link, &call->fields); + + return 0; +err: + if (field) { + kfree(field->name); + kfree(field->type); + } + kfree(field); + return -ENOMEM; +} + static void ftrace_clear_events(void) { struct ftrace_event_call *call = (void *)__start_ftrace_events; @@ -343,7 +371,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, #undef FIELD #define FIELD(type, name) \ - #type, #name, offsetof(typeof(field), name), sizeof(field.name) + #type, "common_" #name, offsetof(typeof(field), name), \ + sizeof(field.name) static int trace_write_header(struct trace_seq *s) { @@ -581,6 +610,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) call->name); } + if (call->define_fields) { + ret = call->define_fields(); + if (ret < 0) { + pr_warning("Could not initialize trace point" + " events/%s\n", call->name); + return ret; + } + } + /* A trace may not want to export its format */ if (!call->show_format) return 0; diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 5117c43f5c6..30743f7d411 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \ } #include + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 6b3261ca988..468938f7014 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -252,6 +252,7 @@ static int ftrace_raw_init_event_##call(void) \ if (!id) \ return -ENODEV; \ event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ } \ \ @@ -264,6 +265,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ _TRACE_PROFILE_INIT(call) \ } -- cgit v1.2.3-70-g09d2 From 7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:31:04 -0500 Subject: tracing: add per-event filtering This patch adds per-event filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each event directory. This file can be written to to set filters; reading from it will display the current set of filters set for that event. Basically, any field listed in the 'format' file for an event can be filtered on (including strings, but not yet other array types) using either matching ('==') or non-matching ('!=') 'predicates'. A 'predicate' can be either a single expression: # echo pid != 0 > filter # cat filter pid != 0 or a compound expression of up to 8 sub-expressions combined using '&&' or '||': # echo comm == Xorg > filter # echo "&& sig != 29" > filter # cat filter comm == Xorg && sig != 29 Only events having field values matching an expression will be available in the trace output; non-matching events are discarded. Note that a compound expression is built up by echoing each sub-expression separately - it's not the most efficient way to do things, but it keeps the parser simple and assumes that compound expressions will be relatively uncommon. In any case, a subsequent patch introducing a way to set filters for entire subsystems should mitigate any need to do this for lots of events. Setting a filter without an '&&' or '||' clears the previous filter completely and sets the filter to the new expression: # cat filter comm == Xorg && sig != 29 # echo comm != Xorg # cat filter comm != Xorg To clear a filter, echo 0 to the filter file: # echo 0 > filter # cat filter none The limit of 8 predicates for a compound expression is arbitrary - for efficiency, it's implemented as an array of pointers to predicates, and 8 seemed more than enough for any filter... Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710665.7703.48.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/Makefile | 1 + kernel/trace/trace.h | 28 ++++ kernel/trace/trace_events.c | 77 +++++++++ kernel/trace/trace_events_filter.c | 326 ++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 4 + 5 files changed, 436 insertions(+) create mode 100644 kernel/trace/trace_events_filter.c (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e45c206c2f..2630f5121ec 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -45,5 +45,6 @@ obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACER) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o +obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9288dc7ad14..d9eb39e4bb3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -795,6 +795,7 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; + struct filter_pred **preds; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; @@ -803,8 +804,35 @@ struct ftrace_event_call { #endif }; +#define MAX_FILTER_PRED 8 + +struct filter_pred; + +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); + +struct filter_pred { + filter_pred_fn_t fn; + u64 val; + char *str_val; + int str_len; + char *field_name; + int offset; + int not; + int or; + int compound; + int clear; +}; + int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); +extern void filter_free_pred(struct filter_pred *pred); +extern int filter_print_preds(struct filter_pred **preds, char *buf); +extern int filter_parse(char **pbuf, struct filter_pred *pred); +extern int filter_add_pred(struct ftrace_event_call *call, + struct filter_pred *pred); +extern void filter_free_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); + void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 961b057da28..97470c48956 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -459,6 +459,71 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return r; } +static ssize_t +event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + r = filter_print_preds(call->preds, s->buffer); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + + kfree(s); + + return r; +} + +static ssize_t +event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[64], *pbuf = buf; + struct filter_pred *pred; + int err; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return -ENOMEM; + + err = filter_parse(&pbuf, pred); + if (err < 0) { + filter_free_pred(pred); + return err; + } + + if (pred->clear) { + filter_free_preds(call); + return cnt; + } + + if (filter_add_pred(call, pred)) { + filter_free_pred(pred); + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -504,6 +569,12 @@ static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, }; +static const struct file_operations ftrace_event_filter_fops = { + .open = tracing_open_generic, + .read = event_filter_read, + .write = event_filter_write, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -619,6 +690,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } + entry = debugfs_create_file("filter", 0444, call->dir, call, + &ftrace_event_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", call->name); + /* A trace may not want to export its format */ if (!call->show_format) return 0; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c new file mode 100644 index 00000000000..8e8c5fa25be --- /dev/null +++ b/kernel/trace/trace_events_filter.c @@ -0,0 +1,326 @@ +/* + * trace_events_filter - generic event filtering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2009 Tom Zanussi + */ + +#include +#include +#include +#include + +#include "trace.h" + +static int filter_pred_64(struct filter_pred *pred, void *event) +{ + u64 *addr = (u64 *)(event + pred->offset); + u64 val = (u64)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_32(struct filter_pred *pred, void *event) +{ + u32 *addr = (u32 *)(event + pred->offset); + u32 val = (u32)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_16(struct filter_pred *pred, void *event) +{ + u16 *addr = (u16 *)(event + pred->offset); + u16 val = (u16)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_8(struct filter_pred *pred, void *event) +{ + u8 *addr = (u8 *)(event + pred->offset); + u8 val = (u8)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_string(struct filter_pred *pred, void *event) +{ + char *addr = (char *)(event + pred->offset); + int cmp, match; + + cmp = strncmp(addr, pred->str_val, pred->str_len); + + match = (!cmp) ^ pred->not; + + return match; +} + +/* return 1 if event matches, 0 otherwise (discard) */ +int filter_match_preds(struct ftrace_event_call *call, void *rec) +{ + int i, matched, and_failed = 0; + struct filter_pred *pred; + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (call->preds[i]) { + pred = call->preds[i]; + if (and_failed && !pred->or) + continue; + matched = pred->fn(pred, rec); + if (!matched && !pred->or) { + and_failed = 1; + continue; + } else if (matched && pred->or) + return 1; + } else + break; + } + + if (and_failed) + return 0; + + return 1; +} + +int filter_print_preds(struct filter_pred **preds, char *buf) +{ + ssize_t this_len = 0; + char *field_name; + struct filter_pred *pred; + int i; + + if (!preds) { + this_len += sprintf(buf + this_len, "none\n"); + return this_len; + } + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (preds[i]) { + pred = preds[i]; + field_name = pred->field_name; + if (i) + this_len += sprintf(buf + this_len, + pred->or ? "|| " : "&& "); + this_len += sprintf(buf + this_len, + "%s ", field_name); + this_len += sprintf(buf + this_len, + pred->not ? "!= " : "== "); + if (pred->str_val) + this_len += sprintf(buf + this_len, + "%s\n", pred->str_val); + else + this_len += sprintf(buf + this_len, + "%llu\n", pred->val); + } else + break; + } + + return this_len; +} + +static struct ftrace_event_field * +find_event_field(struct ftrace_event_call *call, char *name) +{ + struct ftrace_event_field *field; + struct list_head *entry, *tmp; + + list_for_each_safe(entry, tmp, &call->fields) { + field = list_entry(entry, struct ftrace_event_field, link); + if (!strcmp(field->name, name)) + return field; + } + + return NULL; +} + +void filter_free_pred(struct filter_pred *pred) +{ + if (!pred) + return; + + kfree(pred->field_name); + kfree(pred->str_val); + kfree(pred); +} + +void filter_free_preds(struct ftrace_event_call *call) +{ + int i; + + if (call->preds) { + for (i = 0; i < MAX_FILTER_PRED; i++) + filter_free_pred(call->preds[i]); + kfree(call->preds); + call->preds = NULL; + } +} + +static int __filter_add_pred(struct ftrace_event_call *call, + struct filter_pred *pred) +{ + int i; + + if (call->preds && !pred->compound) + filter_free_preds(call); + + if (!call->preds) { + call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + GFP_KERNEL); + if (!call->preds) + return -ENOMEM; + } + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (!call->preds[i]) { + call->preds[i] = pred; + return 0; + } + } + + return -ENOMEM; +} + +static int is_string_field(const char *type) +{ + if (strchr(type, '[') && strstr(type, "char")) + return 1; + + return 0; +} + +int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) +{ + struct ftrace_event_field *field; + + field = find_event_field(call, pred->field_name); + if (!field) + return -EINVAL; + + pred->offset = field->offset; + + if (is_string_field(field->type)) { + pred->fn = filter_pred_string; + pred->str_len = field->size; + return __filter_add_pred(call, pred); + } + + switch (field->size) { + case 8: + pred->fn = filter_pred_64; + break; + case 4: + pred->fn = filter_pred_32; + break; + case 2: + pred->fn = filter_pred_16; + break; + case 1: + pred->fn = filter_pred_8; + break; + default: + return -EINVAL; + } + + return __filter_add_pred(call, pred); +} + +int filter_parse(char **pbuf, struct filter_pred *pred) +{ + char *tmp, *tok, *val_str = NULL; + int tok_n = 0; + + /* field ==/!= number, or/and field ==/!= number, number */ + while ((tok = strsep(pbuf, " \n"))) { + if (tok_n == 0) { + if (!strcmp(tok, "0")) { + pred->clear = 1; + return 0; + } else if (!strcmp(tok, "&&")) { + pred->or = 0; + pred->compound = 1; + } else if (!strcmp(tok, "||")) { + pred->or = 1; + pred->compound = 1; + } else + pred->field_name = tok; + tok_n = 1; + continue; + } + if (tok_n == 1) { + if (!pred->field_name) + pred->field_name = tok; + else if (!strcmp(tok, "!=")) + pred->not = 1; + else if (!strcmp(tok, "==")) + pred->not = 0; + else { + pred->field_name = NULL; + return -EINVAL; + } + tok_n = 2; + continue; + } + if (tok_n == 2) { + if (pred->compound) { + if (!strcmp(tok, "!=")) + pred->not = 1; + else if (!strcmp(tok, "==")) + pred->not = 0; + else { + pred->field_name = NULL; + return -EINVAL; + } + } else { + val_str = tok; + break; /* done */ + } + tok_n = 3; + continue; + } + if (tok_n == 3) { + val_str = tok; + break; /* done */ + } + } + + pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); + if (!pred->field_name) + return -ENOMEM; + + pred->val = simple_strtoull(val_str, &tmp, 10); + if (tmp == val_str) { + pred->str_val = kstrdup(val_str, GFP_KERNEL); + if (!pred->str_val) + return -ENOMEM; + } + + return 0; +} + + diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 468938f7014..ebf215e87d5 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -204,6 +204,7 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ + struct ftrace_event_call *call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ @@ -222,6 +223,9 @@ static void ftrace_raw_event_##call(proto) \ assign; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ + \ + if (call->preds && !filter_match_preds(call, entry)) \ + ring_buffer_event_discard(event); \ } \ \ static int ftrace_raw_reg_event_##call(void) \ -- cgit v1.2.3-70-g09d2 From cfb180f3e71b2a280a254c8646a9ab1beab63f84 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:31:17 -0500 Subject: tracing: add per-subsystem filtering This patch adds per-subsystem filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each subsystem directory. This file can be written to to set filters; reading from it will display the current set of filters set for that subsystem. Basically what it does is propagate the filter down to each event contained in the subsystem. If a particular event doesn't have a field with the name specified in the filter, it simply doesn't get set for that event. You can verify whether or not the filter was set for a particular event by looking at the filter file for that event. As with per-event filters, compound expressions are supported, echoing '0' to the subsystem's filter file clears all filters in the subsystem, etc. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710677.7703.49.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 15 +++++++ kernel/trace/trace_events.c | 86 +++++++++++++++++++++++++++++++++++--- kernel/trace/trace_events_filter.c | 80 +++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d9eb39e4bb3..f267723c3c5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -804,6 +804,18 @@ struct ftrace_event_call { #endif }; +struct event_subsystem { + struct list_head list; + const char *name; + struct dentry *entry; + struct filter_pred **preds; +}; + +#define events_for_each(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + #define MAX_FILTER_PRED 8 struct filter_pred; @@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); extern void filter_free_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern void filter_free_subsystem_preds(struct event_subsystem *system); +extern int filter_add_subsystem_pred(struct event_subsystem *system, + struct filter_pred *pred); void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 97470c48956..97d4daaddd9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct event_subsystem *system = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + r = filter_print_preds(system->preds, s->buffer); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + + kfree(s); + + return r; +} + +static ssize_t +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct event_subsystem *system = filp->private_data; + char buf[64], *pbuf = buf; + struct filter_pred *pred; + int err; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return -ENOMEM; + + err = filter_parse(&pbuf, pred); + if (err < 0) { + filter_free_pred(pred); + return err; + } + + if (pred->clear) { + filter_free_subsystem_preds(system); + return cnt; + } + + if (filter_add_subsystem_pred(system, pred)) { + filter_free_pred(pred); + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = { .write = event_filter_write, }; +static const struct file_operations ftrace_subsystem_filter_fops = { + .open = tracing_open_generic, + .read = subsystem_filter_read, + .write = subsystem_filter_write, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void) return d_events; } -struct event_subsystem { - struct list_head list; - const char *name; - struct dentry *entry; -}; - static LIST_HEAD(event_subsystems); static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { struct event_subsystem *system; + struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { @@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->name = name; list_add(&system->list, &event_subsystems); + system->preds = NULL; + + entry = debugfs_create_file("filter", 0444, system->entry, system, + &ftrace_subsystem_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", name); + return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8e8c5fa25be..1ab20cee0e4 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call) } } +void filter_free_subsystem_preds(struct event_subsystem *system) +{ + struct ftrace_event_call *call = __start_ftrace_events; + int i; + + if (system->preds) { + for (i = 0; i < MAX_FILTER_PRED; i++) + filter_free_pred(system->preds[i]); + kfree(system->preds); + system->preds = NULL; + } + + events_for_each(call) { + if (!call->name || !call->regfunc) + continue; + + if (!strcmp(call->system, system->name)) + filter_free_preds(call); + } +} + static int __filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) { @@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) return __filter_add_pred(call, pred); } +static struct filter_pred *copy_pred(struct filter_pred *pred) +{ + struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); + if (!new_pred) + return NULL; + + memcpy(new_pred, pred, sizeof(*pred)); + if (pred->str_val) { + new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); + new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); + if (!new_pred->str_val) { + kfree(new_pred); + return NULL; + } + } + + return new_pred; +} + +int filter_add_subsystem_pred(struct event_subsystem *system, + struct filter_pred *pred) +{ + struct ftrace_event_call *call = __start_ftrace_events; + struct filter_pred *event_pred; + int i; + + if (system->preds && !pred->compound) + filter_free_subsystem_preds(system); + + if (!system->preds) { + system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + GFP_KERNEL); + if (!system->preds) + return -ENOMEM; + } + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (!system->preds[i]) { + system->preds[i] = pred; + break; + } + if (i == MAX_FILTER_PRED - 1) + return -EINVAL; + } + + events_for_each(call) { + if (!call->name || !call->regfunc) + continue; + + if (!strcmp(call->system, system->name)) { + event_pred = copy_pred(pred); + if (event_pred) + filter_add_pred(call, event_pred); + } + } + + return 0; +} + int filter_parse(char **pbuf, struct filter_pred *pred) { char *tmp, *tok, *val_str = NULL; -- cgit v1.2.3-70-g09d2 From fe9f57f250ab4d781b99504caeb218ca2db14c1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Mar 2009 18:41:59 +0100 Subject: tracing: add run-time field descriptions for event filtering, kfree fix Impact: fix potential kfree of random data in (rare) failure path Zero-initialize the field structure. Reported-by: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <1237710639.7703.46.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 97d4daaddd9..594d78aaa18 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -24,26 +24,31 @@ int trace_define_field(struct ftrace_event_call *call, char *type, { struct ftrace_event_field *field; - field = kmalloc(sizeof(*field), GFP_KERNEL); + field = kzalloc(sizeof(*field), GFP_KERNEL); if (!field) goto err; + field->name = kstrdup(name, GFP_KERNEL); if (!field->name) goto err; + field->type = kstrdup(type, GFP_KERNEL); if (!field->type) goto err; + field->offset = offset; field->size = size; list_add(&field->link, &call->fields); return 0; + err: if (field) { kfree(field->name); kfree(field->type); } kfree(field); + return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 9bd7d099ab3f10dd666da399c064999bae427cd9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 23:10:43 +0100 Subject: tracing/events: make the filter files writable We need the filter files to be writable, the current filter file permissions are only set readable. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <1237759847-21025-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 594d78aaa18..19f61dd2321 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -706,7 +706,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->preds = NULL; - entry = debugfs_create_file("filter", 0444, system->entry, system, + entry = debugfs_create_file("filter", 0644, system->entry, system, &ftrace_subsystem_filter_fops); if (!entry) pr_warning("Could not create debugfs " @@ -769,7 +769,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } - entry = debugfs_create_file("filter", 0444, call->dir, call, + entry = debugfs_create_file("filter", 0644, call->dir, call, &ftrace_event_filter_fops); if (!entry) pr_warning("Could not create debugfs " -- cgit v1.2.3-70-g09d2 From c4cff064be678f1e8344d907499f2a81282edc19 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 23 Mar 2009 03:26:48 -0500 Subject: tracing/filters: clean up filter_add_subsystem_pred() Impact: cleanup, memory leak fix This patch cleans up filter_add_subsystem_pred(): - searches for the field before creating a copy of the pred - fixes memory leak in the case a predicate isn't applied - if -ENOMEM, makes sure there's no longer a reference to the pred so the caller can free the half-finished filter - changes the confusing i == MAX_FILTER_PRED - 1 comparison previously remarked upon This affects only per-subsystem event filtering. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237796808.7527.40.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 1 + kernel/trace/trace_events_filter.c | 31 ++++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 19f61dd2321..fdab30d6c83 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -585,6 +585,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } if (filter_add_subsystem_pred(system, pred)) { + filter_free_subsystem_preds(system); filter_free_pred(pred); return -EINVAL; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index fd01d8022ad..4117c2ebc24 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -318,22 +318,39 @@ int filter_add_subsystem_pred(struct event_subsystem *system, system->preds[i] = pred; break; } - if (i == MAX_FILTER_PRED - 1) - return -EINVAL; } + if (i == MAX_FILTER_PRED) + return -EINVAL; + events_for_each(call) { + int err; + if (!call->name || !call->regfunc) continue; - if (!strcmp(call->system, system->name)) { - event_pred = copy_pred(pred); - if (event_pred) - filter_add_pred(call, event_pred); - } + if (strcmp(call->system, system->name)) + continue; + + if (!find_event_field(call, pred->field_name)) + continue; + + event_pred = copy_pred(pred); + if (!event_pred) + goto oom; + + err = filter_add_pred(call, event_pred); + if (err) + filter_free_pred(event_pred); + if (err == -ENOMEM) + goto oom; } return 0; + +oom: + system->preds[i] = NULL; + return -ENOMEM; } int filter_parse(char **pbuf, struct filter_pred *pred) -- cgit v1.2.3-70-g09d2 From 09f1f245c79585383de63e3ca54d0f91824bff3a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 24 Mar 2009 02:14:11 -0500 Subject: tracing/filters: free pred when clearing filters Impact: fix (small) per trace filter modification memory leak Free the current pred when clearing the filters via the filter files. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237878851.8339.58.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fdab30d6c83..a9381384aa9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -516,6 +516,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (pred->clear) { filter_free_preds(call); + filter_free_pred(pred); return cnt; } @@ -581,6 +582,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (pred->clear) { filter_free_subsystem_preds(system); + filter_free_pred(pred); return cnt; } -- cgit v1.2.3-70-g09d2 From 4bda2d517bfa3ce3d7044e06988cdddae7adffe2 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 24 Mar 2009 02:14:31 -0500 Subject: tracing/filters: use trace_seq_printf() to print filters Impact: cleanup Instead of just using the trace_seq buffer to print the filters, use trace_seq_printf() as it was intended to be used. Reported-by: Steven Rostedt Signed-off-by: Tom Zanussi Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237878871.8339.59.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 3 ++- kernel/trace/trace_events.c | 8 ++++---- kernel/trace/trace_events_filter.c | 25 +++++++++---------------- 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54fd9bcd0a6..90a848debcb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -840,7 +840,8 @@ struct filter_pred { int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); extern void filter_free_pred(struct filter_pred *pred); -extern int filter_print_preds(struct filter_pred **preds, char *buf); +extern void filter_print_preds(struct filter_pred **preds, + struct trace_seq *s); extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a9381384aa9..d132997ab75 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -481,8 +481,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - r = filter_print_preds(call->preds, s->buffer); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + filter_print_preds(call->preds, s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -547,8 +547,8 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - r = filter_print_preds(system->preds, s->buffer); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + filter_print_preds(system->preds, s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3f0b79f8a4b..9fca8bb1c06 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -24,6 +24,7 @@ #include #include "trace.h" +#include "trace_output.h" static int filter_pred_64(struct filter_pred *pred, void *event) { @@ -108,16 +109,15 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) return 1; } -int filter_print_preds(struct filter_pred **preds, char *buf) +void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) { - ssize_t this_len = 0; char *field_name; struct filter_pred *pred; int i; if (!preds) { - this_len += sprintf(buf + this_len, "none\n"); - return this_len; + trace_seq_printf(s, "none\n"); + return; } for (i = 0; i < MAX_FILTER_PRED; i++) { @@ -125,23 +125,16 @@ int filter_print_preds(struct filter_pred **preds, char *buf) pred = preds[i]; field_name = pred->field_name; if (i) - this_len += sprintf(buf + this_len, - pred->or ? "|| " : "&& "); - this_len += sprintf(buf + this_len, - "%s ", field_name); - this_len += sprintf(buf + this_len, - pred->not ? "!= " : "== "); + trace_seq_printf(s, pred->or ? "|| " : "&& "); + trace_seq_printf(s, "%s ", field_name); + trace_seq_printf(s, pred->not ? "!= " : "== "); if (pred->str_val) - this_len += sprintf(buf + this_len, - "%s\n", pred->str_val); + trace_seq_printf(s, "%s\n", pred->str_val); else - this_len += sprintf(buf + this_len, - "%llu\n", pred->val); + trace_seq_printf(s, "%llu\n", pred->val); } else break; } - - return this_len; } static struct ftrace_event_field * -- cgit v1.2.3-70-g09d2 From 9a8118baaeb0eaa148913bed77bf9c6335f6ca63 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 26 Mar 2009 01:24:34 -0500 Subject: tracing: filter fix for TRACE_EVENT_FORMAT events Impact: fix crash (hang) when using TRACE_EVENT_FORMAT filter files filters are only hooked up to the tracepoint events defined using TRACE_EVENT but not the tracers that use TRACE_EVENT_FORMAT, such as ftrace. Do not display the filter files at all for TRACE_EVENT_FORMAT events for the time being. Cc: Steven Rostedt Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237878882.8339.61.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d132997ab75..64ec4d278ff 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -680,7 +680,6 @@ static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { struct event_subsystem *system; - struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { @@ -709,12 +708,6 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->preds = NULL; - entry = debugfs_create_file("filter", 0644, system->entry, system, - &ftrace_subsystem_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/filter' entry\n", name); - return system->entry; } @@ -770,14 +763,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) " events/%s\n", call->name); return ret; } + entry = debugfs_create_file("filter", 0644, call->dir, call, + &ftrace_event_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", call->name); } - entry = debugfs_create_file("filter", 0644, call->dir, call, - &ftrace_event_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/filter' entry\n", call->name); - /* A trace may not want to export its format */ if (!call->show_format) return 0; -- cgit v1.2.3-70-g09d2 From 8433a40eb7f2c4883ad57f9900f63e4d59240eb7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 11 Apr 2009 15:52:18 +0800 Subject: tracing/filters: NIL-terminate user input filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure messages from user space are NIL-terminated strings, otherwise we could dump random memory while reading filter file. Try this: # echo 'parent_comm ==' > events/sched/sched_process_fork/filter # cat events/sched/sched_process_fork/filter parent_comm == � Signed-off-by: Li Zefan Acked-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E04C32.6060508@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64ec4d278ff..054bc1802bc 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = '\0'; pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) @@ -569,6 +570,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = '\0'; pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) -- cgit v1.2.3-70-g09d2 From 44e9c8b7adc52079f0535f9de0c2c2477831389b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 11 Apr 2009 15:55:28 +0800 Subject: tracing/filters: return proper error code when writing filter file - propagate return value of filter_add_pred() to the user - return -ENOSPC but not -ENOMEM or -EINVAL when the filter array is full Signed-off-by: Li Zefan Acked-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E04CF0.3010105@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_events.c | 10 ++++++---- kernel/trace/trace_events_filter.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 054bc1802bc..576f4fa2af0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -521,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - if (filter_add_pred(call, pred)) { + err = filter_add_pred(call, pred); + if (err < 0) { filter_free_pred(pred); - return -EINVAL; + return err; } *ppos += cnt; @@ -588,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - if (filter_add_subsystem_pred(system, pred)) { + err = filter_add_subsystem_pred(system, pred); + if (err < 0) { filter_free_subsystem_preds(system); filter_free_pred(pred); - return -EINVAL; + return err; } *ppos += cnt; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 49b3ef54ec4..e03cbf1e38f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call, } } - return -ENOMEM; + return -ENOSPC; } static int is_string_field(const char *type) @@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, } if (i == MAX_FILTER_PRED) - return -EINVAL; + return -ENOSPC; events_for_each(call) { int err; -- cgit v1.2.3-70-g09d2