From c47956d9ae3341d2d1998bff26620fa3338c01e4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:11 -0500 Subject: ftrace: remove obsolete print continue functionality Impact: cleanup, remove obsolete code Now that the ring buffer used by ftrace allows for variable length entries, we do not need the 'cont' feature of the buffer. This code makes other parts of ftrace more complex and by removing this it simplifies the ftrace code. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f86403..3a357382cce 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -16,7 +16,6 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, - TRACE_CONT, TRACE_STACK, TRACE_PRINT, TRACE_SPECIAL, @@ -178,7 +177,6 @@ struct trace_power { * NEED_RESCED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler - * CONT - multiple entries hold the trace item */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, @@ -186,7 +184,6 @@ enum trace_flag_type { TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_CONT = 0x20, }; #define TRACE_BUF_SIZE 1024 @@ -262,7 +259,6 @@ extern void __ftrace_bad_type(void); do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ - IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ @@ -489,9 +485,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); -extern void trace_seq_print_cont(struct trace_seq *s, - struct trace_iterator *iter); - extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); -- cgit v1.2.3-70-g09d2 From f0868d1e23a8efec33beb3aa688aab7fdb1ae093 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:12 -0500 Subject: ftrace: set up trace event hash infrastructure Impact: simplify/generalize/refactor trace.c The trace.c file is becoming more difficult to maintain due to the growing number of events. There is several formats that an event may be printed. This patch sets up the infrastructure of an event hash to allow for events to register how they should be printed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Makefile | 1 + kernel/trace/trace.c | 275 +------------------------- kernel/trace/trace.h | 8 +- kernel/trace/trace_boot.c | 1 + kernel/trace/trace_functions_graph.c | 1 + kernel/trace/trace_hw_branches.c | 1 + kernel/trace/trace_mmiotrace.c | 1 + kernel/trace/trace_output.c | 365 +++++++++++++++++++++++++++++++++++ kernel/trace/trace_output.h | 43 +++++ kernel/trace/trace_power.c | 1 + 10 files changed, 416 insertions(+), 281 deletions(-) create mode 100644 kernel/trace/trace_output.c create mode 100644 kernel/trace/trace_output.h (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 349d5a93653..549f93c9b39 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fca0233f1d7..90ce0c1d437 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -38,6 +38,7 @@ #include #include "trace.h" +#include "trace_output.h" #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) @@ -330,132 +331,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(current); } -/** - * trace_seq_printf - sequence printing of trace information - * @s: trace sequence descriptor - * @fmt: printf format string - * - * The tracer may use either sequence operations or its own - * copy to user routines. To simplify formating of a trace - * trace_seq_printf is used to store strings into a special - * buffer (@s). Then the output may be either used by - * the sequencer or pulled into another buffer. - */ -int -trace_seq_printf(struct trace_seq *s, const char *fmt, ...) -{ - int len = (PAGE_SIZE - 1) - s->len; - va_list ap; - int ret; - - if (!len) - return 0; - - va_start(ap, fmt); - ret = vsnprintf(s->buffer + s->len, len, fmt, ap); - va_end(ap); - - /* If we can't write it all, don't bother writing anything */ - if (ret >= len) - return 0; - - s->len += ret; - - return len; -} - -/** - * trace_seq_puts - trace sequence printing of simple string - * @s: trace sequence descriptor - * @str: simple string to record - * - * The tracer may use either the sequence operations or its own - * copy to user routines. This function records a simple string - * into a special buffer (@s) for later retrieval by a sequencer - * or other mechanism. - */ -static int -trace_seq_puts(struct trace_seq *s, const char *str) -{ - int len = strlen(str); - - if (len > ((PAGE_SIZE - 1) - s->len)) - return 0; - - memcpy(s->buffer + s->len, str, len); - s->len += len; - - return len; -} - -static int -trace_seq_putc(struct trace_seq *s, unsigned char c) -{ - if (s->len >= (PAGE_SIZE - 1)) - return 0; - - s->buffer[s->len++] = c; - - return 1; -} - -static int -trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) -{ - if (len > ((PAGE_SIZE - 1) - s->len)) - return 0; - - memcpy(s->buffer + s->len, mem, len); - s->len += len; - - return len; -} - -#define MAX_MEMHEX_BYTES 8 -#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) - -static int -trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) -{ - unsigned char hex[HEX_CHARS]; - unsigned char *data = mem; - int i, j; - -#ifdef __BIG_ENDIAN - for (i = 0, j = 0; i < len; i++) { -#else - for (i = len-1, j = 0; i >= 0; i--) { -#endif - hex[j++] = hex_asc_hi(data[i]); - hex[j++] = hex_asc_lo(data[i]); - } - hex[j++] = ' '; - - return trace_seq_putmem(s, hex, j); -} - -static int -trace_seq_path(struct trace_seq *s, struct path *path) -{ - unsigned char *p; - - if (s->len >= (PAGE_SIZE - 1)) - return 0; - p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); - if (!IS_ERR(p)) { - p = mangle_path(s->buffer + s->len, p, "\n"); - if (p) { - s->len = p - s->buffer; - return 1; - } - } else { - s->buffer[s->len++] = '?'; - return 1; - } - - return 0; -} - static void trace_seq_reset(struct trace_seq *s) { @@ -1473,154 +1348,6 @@ static void s_stop(struct seq_file *m, void *p) mutex_unlock(&trace_types_lock); } -#ifdef CONFIG_KRETPROBES -static inline const char *kretprobed(const char *name) -{ - static const char tramp_name[] = "kretprobe_trampoline"; - int size = sizeof(tramp_name); - - if (strncmp(tramp_name, name, size) == 0) - return "[unknown/kretprobe'd]"; - return name; -} -#else -static inline const char *kretprobed(const char *name) -{ - return name; -} -#endif /* CONFIG_KRETPROBES */ - -static int -seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) -{ -#ifdef CONFIG_KALLSYMS - char str[KSYM_SYMBOL_LEN]; - const char *name; - - kallsyms_lookup(address, NULL, NULL, NULL, str); - - name = kretprobed(str); - - return trace_seq_printf(s, fmt, name); -#endif - return 1; -} - -static int -seq_print_sym_offset(struct trace_seq *s, const char *fmt, - unsigned long address) -{ -#ifdef CONFIG_KALLSYMS - char str[KSYM_SYMBOL_LEN]; - const char *name; - - sprint_symbol(str, address); - name = kretprobed(str); - - return trace_seq_printf(s, fmt, name); -#endif - return 1; -} - -#ifndef CONFIG_64BIT -# define IP_FMT "%08lx" -#else -# define IP_FMT "%016lx" -#endif - -int -seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) -{ - int ret; - - if (!ip) - return trace_seq_printf(s, "0"); - - if (sym_flags & TRACE_ITER_SYM_OFFSET) - ret = seq_print_sym_offset(s, "%s", ip); - else - ret = seq_print_sym_short(s, "%s", ip); - - if (!ret) - return 0; - - if (sym_flags & TRACE_ITER_SYM_ADDR) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; -} - -static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, - unsigned long ip, unsigned long sym_flags) -{ - struct file *file = NULL; - unsigned long vmstart = 0; - int ret = 1; - - if (mm) { - const struct vm_area_struct *vma; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ip); - if (vma) { - file = vma->vm_file; - vmstart = vma->vm_start; - } - if (file) { - ret = trace_seq_path(s, &file->f_path); - if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); - } - up_read(&mm->mmap_sem); - } - if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) - ret = trace_seq_printf(s, " <" IP_FMT ">", ip); - return ret; -} - -static int -seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, - unsigned long sym_flags) -{ - struct mm_struct *mm = NULL; - int ret = 1; - unsigned int i; - - if (trace_flags & TRACE_ITER_SYM_USEROBJ) { - struct task_struct *task; - /* - * we do the lookup on the thread group leader, - * since individual threads might have already quit! - */ - rcu_read_lock(); - task = find_task_by_vpid(entry->ent.tgid); - if (task) - mm = get_task_mm(task); - rcu_read_unlock(); - } - - for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { - unsigned long ip = entry->caller[i]; - - if (ip == ULONG_MAX || !ret) - break; - if (i && ret) - ret = trace_seq_puts(s, " <- "); - if (!ip) { - if (ret) - ret = trace_seq_puts(s, "??"); - continue; - } - if (!ret) - break; - if (ret) - ret = seq_print_user_ip(s, mm, ip, sym_flags); - } - - if (mm) - mmput(mm); - return ret; -} - static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3a357382cce..6bd71fa1e1c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -30,7 +30,7 @@ enum trace_type { TRACE_HW_BRANCHES, TRACE_POWER, - __TRACE_LAST_TYPE + __TRACE_LAST_TYPE, }; /* @@ -484,12 +484,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); -extern int -seq_print_ip_sym(struct trace_seq *s, unsigned long ip, - unsigned long sym_flags); -extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, - size_t cnt); extern long ns2usecs(cycle_t nsec); extern int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 3ccebde2848..cb2ff3e297b 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -11,6 +11,7 @@ #include #include "trace.h" +#include "trace_output.h" static struct trace_array *boot_trace; static bool pre_initcalls_finished; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f261966e5b6..f8ac5417afc 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -12,6 +12,7 @@ #include #include "trace.h" +#include "trace_output.h" #define TRACE_GRAPH_INDENT 2 diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index b6a3e20a49a..879752b006b 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -14,6 +14,7 @@ #include #include "trace.h" +#include "trace_output.h" #define SIZEOF_BTS (1 << 13) diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 83f20ae6bd6..fcec59ff235 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -11,6 +11,7 @@ #include #include "trace.h" +#include "trace_output.h" struct header_iter { struct pci_dev *dev; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c new file mode 100644 index 00000000000..1f3f80002b5 --- /dev/null +++ b/kernel/trace/trace_output.c @@ -0,0 +1,365 @@ +/* + * trace_output.c + * + * Copyright (C) 2008 Red Hat Inc, Steven Rostedt + * + */ + +#include +#include +#include + +#include "trace_output.h" + +/* must be a power of 2 */ +#define EVENT_HASHSIZE 128 + +static DEFINE_MUTEX(trace_event_mutex); +static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; + +static int next_event_type = __TRACE_LAST_TYPE + 1; + +/** + * trace_seq_printf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + int len = (PAGE_SIZE - 1) - s->len; + va_list ap; + int ret; + + if (!len) + return 0; + + va_start(ap, fmt); + ret = vsnprintf(s->buffer + s->len, len, fmt, ap); + va_end(ap); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) + return 0; + + s->len += ret; + + return len; +} + +/** + * trace_seq_puts - trace sequence printing of simple string + * @s: trace sequence descriptor + * @str: simple string to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple string + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + */ +int trace_seq_puts(struct trace_seq *s, const char *str) +{ + int len = strlen(str); + + if (len > ((PAGE_SIZE - 1) - s->len)) + return 0; + + memcpy(s->buffer + s->len, str, len); + s->len += len; + + return len; +} + +int trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + if (s->len >= (PAGE_SIZE - 1)) + return 0; + + s->buffer[s->len++] = c; + + return 1; +} + +int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len) +{ + if (len > ((PAGE_SIZE - 1) - s->len)) + return 0; + + memcpy(s->buffer + s->len, mem, len); + s->len += len; + + return len; +} + +int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) +{ + unsigned char hex[HEX_CHARS]; + unsigned char *data = mem; + int i, j; + +#ifdef __BIG_ENDIAN + for (i = 0, j = 0; i < len; i++) { +#else + for (i = len-1, j = 0; i >= 0; i--) { +#endif + hex[j++] = hex_asc_hi(data[i]); + hex[j++] = hex_asc_lo(data[i]); + } + hex[j++] = ' '; + + return trace_seq_putmem(s, hex, j); +} + +int trace_seq_path(struct trace_seq *s, struct path *path) +{ + unsigned char *p; + + if (s->len >= (PAGE_SIZE - 1)) + return 0; + p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); + if (!IS_ERR(p)) { + p = mangle_path(s->buffer + s->len, p, "\n"); + if (p) { + s->len = p - s->buffer; + return 1; + } + } else { + s->buffer[s->len++] = '?'; + return 1; + } + + return 0; +} + +#ifdef CONFIG_KRETPROBES +static inline const char *kretprobed(const char *name) +{ + static const char tramp_name[] = "kretprobe_trampoline"; + int size = sizeof(tramp_name); + + if (strncmp(tramp_name, name, size) == 0) + return "[unknown/kretprobe'd]"; + return name; +} +#else +static inline const char *kretprobed(const char *name) +{ + return name; +} +#endif /* CONFIG_KRETPROBES */ + +static int +seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + const char *name; + + kallsyms_lookup(address, NULL, NULL, NULL, str); + + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); +#endif + return 1; +} + +static int +seq_print_sym_offset(struct trace_seq *s, const char *fmt, + unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + char str[KSYM_SYMBOL_LEN]; + const char *name; + + sprint_symbol(str, address); + name = kretprobed(str); + + return trace_seq_printf(s, fmt, name); +#endif + return 1; +} + +#ifndef CONFIG_64BIT +# define IP_FMT "%08lx" +#else +# define IP_FMT "%016lx" +#endif + +int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags) +{ + struct file *file = NULL; + unsigned long vmstart = 0; + int ret = 1; + + if (mm) { + const struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ip); + if (vma) { + file = vma->vm_file; + vmstart = vma->vm_start; + } + if (file) { + ret = trace_seq_path(s, &file->f_path); + if (ret) + ret = trace_seq_printf(s, "[+0x%lx]", + ip - vmstart); + } + up_read(&mm->mmap_sem); + } + if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; +} + +int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + struct mm_struct *mm = NULL; + int ret = 1; + unsigned int i; + + if (trace_flags & TRACE_ITER_SYM_USEROBJ) { + struct task_struct *task; + /* + * we do the lookup on the thread group leader, + * since individual threads might have already quit! + */ + rcu_read_lock(); + task = find_task_by_vpid(entry->ent.tgid); + if (task) + mm = get_task_mm(task); + rcu_read_unlock(); + } + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i && ret) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + if (ret) + ret = trace_seq_puts(s, "??"); + continue; + } + if (!ret) + break; + if (ret) + ret = seq_print_user_ip(s, mm, ip, sym_flags); + } + + if (mm) + mmput(mm); + return ret; +} + +int +seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) +{ + int ret; + + if (!ip) + return trace_seq_printf(s, "0"); + + if (sym_flags & TRACE_ITER_SYM_OFFSET) + ret = seq_print_sym_offset(s, "%s", ip); + else + ret = seq_print_sym_short(s, "%s", ip); + + if (!ret) + return 0; + + if (sym_flags & TRACE_ITER_SYM_ADDR) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + return ret; +} + +/** + * ftrace_find_event - find a registered event + * @type: the type of event to look for + * + * Returns an event of type @type otherwise NULL + */ +struct trace_event *ftrace_find_event(int type) +{ + struct trace_event *event; + struct hlist_node *n; + unsigned key; + + key = type & (EVENT_HASHSIZE - 1); + + hlist_for_each_entry_rcu(event, n, &event_hash[key], node) { + if (event->type == type) + return event; + } + + return NULL; +} + +/** + * register_ftrace_event - register output for an event type + * @event: the event type to register + * + * Event types are stored in a hash and this hash is used to + * find a way to print an event. If the @event->type is set + * then it will use that type, otherwise it will assign a + * type to use. + * + * If you assign your own type, please make sure it is added + * to the trace_type enum in trace.h, to avoid collisions + * with the dynamic types. + * + * Returns the event type number or zero on error. + */ +int register_ftrace_event(struct trace_event *event) +{ + unsigned key; + int ret = 0; + + mutex_lock(&trace_event_mutex); + + if (!event->type) + event->type = next_event_type++; + else if (event->type > __TRACE_LAST_TYPE) { + printk(KERN_WARNING "Need to add type to trace.h\n"); + WARN_ON(1); + } + + if (ftrace_find_event(event->type)) + goto out; + + key = event->type & (EVENT_HASHSIZE - 1); + + hlist_add_head_rcu(&event->node, &event_hash[key]); + + ret = event->type; + out: + mutex_unlock(&trace_event_mutex); + + return ret; +} + +/** + * unregister_ftrace_event - remove a no longer used event + * @event: the event to remove + */ +int unregister_ftrace_event(struct trace_event *event) +{ + mutex_lock(&trace_event_mutex); + hlist_del(&event->node); + mutex_unlock(&trace_event_mutex); + + return 0; +} diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h new file mode 100644 index 00000000000..1fcc76e1378 --- /dev/null +++ b/kernel/trace/trace_output.h @@ -0,0 +1,43 @@ +#ifndef __TRACE_EVENTS_H +#define __TRACE_EVENTS_H + +#include "trace.h" + +typedef int (*trace_print_func)(struct trace_seq *s, struct trace_entry *entry, + int flags); + +struct trace_event { + struct hlist_node node; + int type; + trace_print_func trace; + trace_print_func latency_trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); +extern int +seq_print_ip_sym(struct trace_seq *s, unsigned long ip, + unsigned long sym_flags); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); +int trace_seq_puts(struct trace_seq *s, const char *str); +int trace_seq_putc(struct trace_seq *s, unsigned char c); +int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len); +int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len); +int trace_seq_path(struct trace_seq *s, struct path *path); +int seq_print_userip_objs(const struct userstack_entry *entry, + struct trace_seq *s, unsigned long sym_flags); +int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, + unsigned long ip, unsigned long sym_flags); + +struct trace_event *ftrace_find_event(int type); +int register_ftrace_event(struct trace_event *event); +int unregister_ftrace_event(struct trace_event *event); + +#define MAX_MEMHEX_BYTES 8 +#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) + +#endif + diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index a7172a352f6..b9b13c39b4b 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -16,6 +16,7 @@ #include #include "trace.h" +#include "trace_output.h" static struct trace_array *power_trace; static int __read_mostly trace_power_enabled; -- cgit v1.2.3-70-g09d2 From dbd0b4b33074aa6b7832a9d9a5bd985eca5c1aa2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 28 Dec 2008 20:44:51 -0800 Subject: tracing/ftrace: provide the base infrastructure for histogram tracing Impact: extend the tracing API The goal of this patch is to normalize and make more easy the implementation of statistical (histogram) tracing. It implements a trace_stat file into the /debugfs/tracing directory where one can print a one-shot output of statistics/histogram entries. A tracer has to provide two basic iterator callbacks: stat_start() => the first entry stat_next(prev, idx) => the next one. Note that it is adapted for arrays or hash tables or lists.... since it provides a pointer to the previous entry and the current index of the iterator. These two callbacks are called to get a snapshot of the statistics at each opening of the trace_stat file because. The values are so updated between two "cat trace_stat". And the tracer is free to lock its datas during the iteration to keep consistent values. Since it is almost always interesting to sort statisticals values to address the problems by priority, this infrastructure provides a "sorting" of the stat entries too if desired. A tracer has just to provide a stat_cmp callback to compare two entries and the stat tracing infrastructure will build a sorted list of the given entries. A last callback, called stat_headers, can be implemented by a tracer to output headers on its trace. If one of these callbacks is changed on runtime, it just have to signal it to the stat tracing API by calling the init_tracer_stat() helper. Changes in V2: - Fix a memory leak if the user opens multiple times the trace_stat file without closing it. Now we always free our list before rebuilding it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/Makefile | 1 + kernel/trace/trace.c | 3 +- kernel/trace/trace.h | 17 ++++ kernel/trace/trace_stat.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 kernel/trace/trace_stat.c (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 549f93c9b39..31cd5fbc0ee 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace_output.o +obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3f0317586cf..b789c010512 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2354,6 +2354,7 @@ static int tracing_set_tracer(char *buf) if (ret) goto out; } + init_tracer_stat(t); trace_branch_enable(tr); out: @@ -3206,7 +3207,7 @@ __init static int tracer_alloc_buffers(void) #else current_trace = &nop_trace; #endif - + init_tracer_stat(current_trace); /* All seems OK, enable tracing */ tracing_disabled = 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6bd71fa1e1c..05fa804d1c1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -336,6 +336,21 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; + + /* + * If you change one of the following on tracing runtime, recall + * init_tracer_stat() + */ + + /* Iteration over statistic entries */ + void *(*stat_start)(void); + void *(*stat_next)(void *prev, int idx); + /* Compare two entries for sorting (optional) for stats */ + int (*stat_cmp)(void *p1, void *p2); + /* Print a stat entry */ + int (*stat_show)(struct seq_file *s, void *p); + /* Print the headers of your stat entries */ + int (*stat_headers)(struct seq_file *s); }; struct trace_seq { @@ -421,6 +436,8 @@ void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); +void init_tracer_stat(struct tracer *trace); + extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c new file mode 100644 index 00000000000..6f194a33a64 --- /dev/null +++ b/kernel/trace/trace_stat.c @@ -0,0 +1,251 @@ +/* + * Infrastructure for statistic tracing (histogram output). + * + * Copyright (C) 2008 Frederic Weisbecker + * + * Based on the code from trace_branch.c which is + * Copyright (C) 2008 Steven Rostedt + * + */ + + +#include +#include +#include +#include "trace.h" + + +/* List of stat entries from a tracer */ +struct trace_stat_list { + struct list_head list; + void *stat; +}; + +static struct trace_stat_list stat_list; + +/* + * This is a copy of the current tracer to avoid racy + * and dangerous output while the current tracer is + * switched. + */ +static struct tracer current_tracer; + +/* + * Protect both the current tracer and the global + * stat list. + */ +static DEFINE_MUTEX(stat_list_mutex); + + +static void reset_stat_list(void) +{ + struct trace_stat_list *node; + struct list_head *next; + + if (list_empty(&stat_list.list)) + return; + + node = list_entry(stat_list.list.next, struct trace_stat_list, list); + next = node->list.next; + + while (&node->list != next) { + kfree(node); + node = list_entry(next, struct trace_stat_list, list); + } + kfree(node); + + INIT_LIST_HEAD(&stat_list.list); +} + +void init_tracer_stat(struct tracer *trace) +{ + mutex_lock(&stat_list_mutex); + current_tracer = *trace; + mutex_unlock(&stat_list_mutex); +} + +/* + * For tracers that don't provide a stat_cmp callback. + * This one will force an immediate insertion on tail of + * the list. + */ +static int dummy_cmp(void *p1, void *p2) +{ + return 1; +} + +/* + * Initialize the stat list at each trace_stat file opening. + * All of these copies and sorting are required on all opening + * since the stats could have changed between two file sessions. + */ +static int stat_seq_init(void) +{ + struct trace_stat_list *iter_entry, *new_entry; + void *prev_stat; + int ret = 0; + int i; + + mutex_lock(&stat_list_mutex); + reset_stat_list(); + + if (!current_tracer.stat_start || !current_tracer.stat_next || + !current_tracer.stat_show) + goto exit; + + if (!current_tracer.stat_cmp) + current_tracer.stat_cmp = dummy_cmp; + + /* + * The first entry. Actually this is the second, but the first + * one (the stat_list head) is pointless. + */ + new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); + if (!new_entry) { + ret = -ENOMEM; + goto exit; + } + + INIT_LIST_HEAD(&new_entry->list); + list_add(&new_entry->list, &stat_list.list); + new_entry->stat = current_tracer.stat_start(); + + prev_stat = new_entry->stat; + + /* + * Iterate over the tracer stat entries and store them in a sorted + * list. + */ + for (i = 1; ; i++) { + new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); + if (!new_entry) { + ret = -ENOMEM; + goto exit_free_list; + } + + INIT_LIST_HEAD(&new_entry->list); + new_entry->stat = current_tracer.stat_next(prev_stat, i); + + /* End of insertion */ + if (!new_entry->stat) + break; + + list_for_each_entry(iter_entry, &stat_list.list, list) { + /* Insertion with a descendent sorting */ + if (current_tracer.stat_cmp(new_entry->stat, + iter_entry->stat) > 0) { + + list_add_tail(&new_entry->list, + &iter_entry->list); + break; + + /* The current smaller value */ + } else if (list_is_last(&iter_entry->list, + &stat_list.list)) { + list_add(&new_entry->list, &iter_entry->list); + break; + } + } + + prev_stat = new_entry->stat; + } +exit: + mutex_unlock(&stat_list_mutex); + return ret; + +exit_free_list: + reset_stat_list(); + mutex_unlock(&stat_list_mutex); + return ret; +} + + +static void *stat_seq_start(struct seq_file *s, loff_t *pos) +{ + struct trace_stat_list *l = (struct trace_stat_list *)s->private; + + /* Prevent from tracer switch or stat_list modification */ + mutex_lock(&stat_list_mutex); + + /* If we are in the beginning of the file, print the headers */ + if (!*pos && current_tracer.stat_headers) + current_tracer.stat_headers(s); + + return seq_list_start(&l->list, *pos); +} + +static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) +{ + struct trace_stat_list *l = (struct trace_stat_list *)s->private; + + return seq_list_next(p, &l->list, pos); +} + +static void stat_seq_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&stat_list_mutex); +} + +static int stat_seq_show(struct seq_file *s, void *v) +{ + struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); + return current_tracer.stat_show(s, l->stat); +} + +static const struct seq_operations trace_stat_seq_ops = { + .start = stat_seq_start, + .next = stat_seq_next, + .stop = stat_seq_stop, + .show = stat_seq_show +}; + +static int tracing_stat_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &trace_stat_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = &stat_list; + ret = stat_seq_init(); + } + + return ret; +} + + +/* + * Avoid consuming memory with our now useless list. + */ +static int tracing_stat_release(struct inode *i, struct file *f) +{ + mutex_lock(&stat_list_mutex); + reset_stat_list(); + mutex_unlock(&stat_list_mutex); + return 0; +} + +static const struct file_operations tracing_stat_fops = { + .open = tracing_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_stat_release +}; + +static int __init tracing_stat_init(void) +{ + struct dentry *d_tracing; + struct dentry *entry; + + INIT_LIST_HEAD(&stat_list.list); + d_tracing = tracing_init_dentry(); + + entry = debugfs_create_file("trace_stat", 0444, d_tracing, + NULL, + &tracing_stat_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'trace_stat' entry\n"); + return 0; +} +fs_initcall(tracing_stat_init); -- cgit v1.2.3-70-g09d2 From f7d48cbde5c0710008caeaf7dbf14f4a9b064940 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 29 Dec 2008 13:02:17 +0100 Subject: tracing/ftrace: make trace_find_cmdline() generally available Impact: build fix On !CONFIG_CONTEXT_SWITCH_TRACER trace_find_cmdline() is not defined: kernel/trace/trace_output.c: In function 'trace_ctxwake_print': kernel/trace/trace_output.c:499: error: implicit declaration of function 'trace_find_cmdline' kernel/trace/trace_output.c:499: warning: assignment makes pointer from integer without a cast Move it to the generic section in trace.h. Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 05fa804d1c1..a8b624ccd4d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -469,10 +469,10 @@ struct tracer_switch_ops { void *private; struct tracer_switch_ops *next; }; - -char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ +extern char *trace_find_cmdline(int pid); + #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func -- cgit v1.2.3-70-g09d2 From 36994e58a48fb8f9651c7dc845a6de298aba5bfc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Dec 2008 13:42:23 -0800 Subject: tracing/kmemtrace: normalize the raw tracer event to the unified tracing API Impact: new tracer plugin This patch adapts kmemtrace raw events tracing to the unified tracing API. To enable and use this tracer, just do the following: echo kmemtrace > /debugfs/tracing/current_tracer cat /debugfs/tracing/trace You will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | type_id 1 call_site 18446744071565527833 ptr 18446612134395152256 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164672 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164912 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345165152 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 0 call_site 18446744071566144042 ptr 18446612134346191680 bytes_req 1304 bytes_alloc 1312 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 That was to stay backward compatible with the format output produced in inux/tracepoint.h. This is the default ouput, but note that I tried something else. If you change an option: echo kmem_minimalistic > /debugfs/trace_options and then cat /debugfs/trace, you will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | - C 0xffff88007c088780 file_free_rcu + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc780 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc870 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc960 -1 d_alloc + K 1304 1312 000000d0 0xffff8800791d7340 -1 reiserfs_alloc_inode - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 992 1000 000000d0 0xffff880079045b58 -1 alloc_inode + K 768 1024 000080d0 0xffff88007c096400 -1 alloc_pipe_info + K 240 240 000000d0 0xffff8800790dca50 -1 d_alloc + K 272 320 000080d0 0xffff88007c088780 -1 get_empty_filp + K 272 320 000080d0 0xffff88007c088000 -1 get_empty_filp Yeah I shall confess kmem_minimalistic should be: kmem_alternative. Whatever, I find it more readable but this a personal opinion of course. We can drop it if you want. On the ALLOC/FREE column, + means an allocation and - a free. On the type column, you have K = kmalloc, C = cache, P = page I would like the flags to be GFP_* strings but that would not be easy to not break the column with strings.... About the node...it seems to always be -1. I don't know why but that shouldn't be difficult to find. I moved linux/tracepoint.h to trace/tracepoint.h as well. I think that would be more easy to find the tracer headers if they are all in their common directory. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/kmemtrace.h | 86 ------------ include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- include/trace/kmemtrace.h | 75 ++++++++++ init/main.c | 2 +- kernel/trace/Kconfig | 22 +++ kernel/trace/Makefile | 1 + kernel/trace/kmemtrace.c | 343 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 25 ++++ lib/Kconfig.debug | 20 --- mm/kmemtrace.c | 2 +- mm/slob.c | 2 +- mm/slub.c | 2 +- 13 files changed, 472 insertions(+), 112 deletions(-) delete mode 100644 include/linux/kmemtrace.h create mode 100644 include/trace/kmemtrace.h create mode 100644 kernel/trace/kmemtrace.c (limited to 'kernel/trace/trace.h') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h deleted file mode 100644 index 5bea8ead6a6..00000000000 --- a/include/linux/kmemtrace.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include -#include - -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; - -#ifdef CONFIG_KMEMTRACE - -extern void kmemtrace_init(void); - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu " - "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d", - type_id, call_site, (unsigned long) ptr, - (unsigned long) bytes_req, (unsigned long) bytes_alloc, - (unsigned long) gfp_flags, node); -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ - trace_mark(kmemtrace_free, "type_id %d call_site %lu ptr %lu", - type_id, call_site, (unsigned long) ptr); -} - -#else /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_init(void) -{ -} - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ -} - -#endif /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags) -{ - kmemtrace_mark_alloc_node(type_id, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, -1); -} - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 7555ce99f6d..455f9affea9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include +#include /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index dc28432b5b9..6b657f7dcb2 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h new file mode 100644 index 00000000000..ad8b7857855 --- /dev/null +++ b/include/trace/kmemtrace.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include +#include + +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + +#ifdef CONFIG_KMEMTRACE + +extern void kmemtrace_init(void); + +extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node); + +extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr); + +#else /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_init(void) +{ +} + +static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ +} + +static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ +} + +#endif /* CONFIG_KMEMTRACE */ + +static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_mark_alloc_node(type_id, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/init/main.c b/init/main.c index 9711586aa7c..beca7aaddb2 100644 --- a/init/main.c +++ b/init/main.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e2a4ff6fc3a..27fb74b06b3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -264,6 +264,28 @@ config HW_BRANCH_TRACER This tracer records all branches on the system in a circular buffer giving access to the last N branches for each cpu. +config KMEMTRACE + bool "Trace SLAB allocations" + select TRACING + depends on RELAY + help + kmemtrace provides tracing for slab allocator functions, such as + kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected + data is then fed to the userspace application in order to analyse + allocation hotspots, internal fragmentation and so on, making it + possible to see how well an allocator performs, as well as debug + and profile kernel code. + + This requires an userspace application to use. See + Documentation/vm/kmemtrace.txt for more information. + + Saying Y will make the kernel somewhat larger and slower. However, + if you disable kmemtrace at run-time or boot-time, the performance + impact is minimal (depending on the arch the kernel is built for). + + If unsure, say N. + + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 349d5a93653..513dc86b5df 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -33,5 +33,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o obj-$(CONFIG_POWER_TRACER) += trace_power.o +obj-$(CONFIG_KMEMTRACE) += kmemtrace.o libftrace-y := ftrace.o diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c new file mode 100644 index 00000000000..d69cbe3c2a4 --- /dev/null +++ b/kernel/trace/kmemtrace.c @@ -0,0 +1,343 @@ +/* + * Memory allocator tracing + * + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * Copyright (C) 2008 Pekka Enberg + * Copyright (C) 2008 Frederic Weisbecker + */ + +#include +#include +#include +#include +#include + +#include "trace.h" +#include "trace_output.h" + +/* Select an alternative, minimalistic output than the original one */ +#define TRACE_KMEM_OPT_MINIMAL 0x1 + +static struct tracer_opt kmem_opts[] = { + /* Default disable the minimalistic output */ + { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) }, + { } +}; + +static struct tracer_flags kmem_tracer_flags = { + .val = 0, + .opts = kmem_opts +}; + + +static bool kmem_tracing_enabled __read_mostly; +static struct trace_array *kmemtrace_array; + +static int kmem_trace_init(struct trace_array *tr) +{ + int cpu; + kmemtrace_array = tr; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + + kmem_tracing_enabled = true; + + return 0; +} + +static void kmem_trace_reset(struct trace_array *tr) +{ + kmem_tracing_enabled = false; +} + +static void kmemtrace_headers(struct seq_file *s) +{ + /* Don't need headers for the original kmemtrace output */ + if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) + return; + + seq_printf(s, "#\n"); + seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS " + " POINTER NODE CALLER\n"); + seq_printf(s, "# FREE | | | | " + " | | | |\n"); + seq_printf(s, "# |\n\n"); +} + +/* + * The two following functions give the original output from kmemtrace, + * or something close to....perhaps they need some missing things + */ +static enum print_line_t +kmemtrace_print_alloc_original(struct trace_iterator *iter, + struct kmemtrace_alloc_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Taken from the old linux/kmemtrace.h */ + ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " + "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", + entry->type_id, entry->call_site, (unsigned long) entry->ptr, + (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, + (unsigned long) entry->gfp_flags, entry->node); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +kmemtrace_print_free_original(struct trace_iterator *iter, + struct kmemtrace_free_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Taken from the old linux/kmemtrace.h */ + ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", + entry->type_id, entry->call_site, (unsigned long) entry->ptr); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + + +/* The two other following provide a more minimalistic output */ +static enum print_line_t +kmemtrace_print_alloc_compress(struct trace_iterator *iter, + struct kmemtrace_alloc_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Alloc entry */ + ret = trace_seq_printf(s, " + "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Type */ + switch (entry->type_id) { + case KMEMTRACE_TYPE_KMALLOC: + ret = trace_seq_printf(s, "K "); + break; + case KMEMTRACE_TYPE_CACHE: + ret = trace_seq_printf(s, "C "); + break; + case KMEMTRACE_TYPE_PAGES: + ret = trace_seq_printf(s, "P "); + break; + default: + ret = trace_seq_printf(s, "? "); + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Requested */ + ret = trace_seq_printf(s, "%4d ", entry->bytes_req); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Allocated */ + ret = trace_seq_printf(s, "%4d ", entry->bytes_alloc); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Flags + * TODO: would be better to see the name of the GFP flag names + */ + ret = trace_seq_printf(s, "%08x ", entry->gfp_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Pointer to allocated */ + ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Node */ + ret = trace_seq_printf(s, "%4d ", entry->node); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Call site */ + ret = seq_print_ip_sym(s, entry->call_site, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_printf(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +kmemtrace_print_free_compress(struct trace_iterator *iter, + struct kmemtrace_free_entry *entry) +{ + struct trace_seq *s = &iter->seq; + int ret; + + /* Free entry */ + ret = trace_seq_printf(s, " - "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Type */ + switch (entry->type_id) { + case KMEMTRACE_TYPE_KMALLOC: + ret = trace_seq_printf(s, "K "); + break; + case KMEMTRACE_TYPE_CACHE: + ret = trace_seq_printf(s, "C "); + break; + case KMEMTRACE_TYPE_PAGES: + ret = trace_seq_printf(s, "P "); + break; + default: + ret = trace_seq_printf(s, "? "); + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Skip requested/allocated/flags */ + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Pointer to allocated */ + ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Skip node */ + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + /* Call site */ + ret = seq_print_ip_sym(s, entry->call_site, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (!trace_seq_printf(s, "\n")) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + + switch (entry->type) { + case TRACE_KMEM_ALLOC: { + struct kmemtrace_alloc_entry *field; + trace_assign_type(field, entry); + if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) + return kmemtrace_print_alloc_compress(iter, field); + else + return kmemtrace_print_alloc_original(iter, field); + } + + case TRACE_KMEM_FREE: { + struct kmemtrace_free_entry *field; + trace_assign_type(field, entry); + if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) + return kmemtrace_print_free_compress(iter, field); + else + return kmemtrace_print_free_original(iter, field); + } + + default: + return TRACE_TYPE_UNHANDLED; + } +} + +/* Trace allocations */ +void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + struct ring_buffer_event *event; + struct kmemtrace_alloc_entry *entry; + struct trace_array *tr = kmemtrace_array; + unsigned long irq_flags; + + if (!kmem_tracing_enabled) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_ALLOC; + entry->call_site = call_site; + entry->ptr = ptr; + entry->bytes_req = bytes_req; + entry->bytes_alloc = bytes_alloc; + entry->gfp_flags = gfp_flags; + entry->node = node; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); +} + +void kmemtrace_mark_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ + struct ring_buffer_event *event; + struct kmemtrace_free_entry *entry; + struct trace_array *tr = kmemtrace_array; + unsigned long irq_flags; + + if (!kmem_tracing_enabled) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_FREE; + entry->type_id = type_id; + entry->call_site = call_site; + entry->ptr = ptr; + + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); +} + +static struct tracer kmem_tracer __read_mostly = { + .name = "kmemtrace", + .init = kmem_trace_init, + .reset = kmem_trace_reset, + .print_line = kmemtrace_print_line, + .print_header = kmemtrace_headers, + .flags = &kmem_tracer_flags +}; + +static int __init init_kmem_tracer(void) +{ + return register_tracer(&kmem_tracer); +} + +device_initcall(init_kmem_tracer); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f86403..534505bb39b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,6 +9,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -29,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_KMEM_ALLOC, + TRACE_KMEM_FREE, TRACE_POWER, __TRACE_LAST_TYPE @@ -170,6 +173,24 @@ struct trace_power { struct power_trace state_data; }; +struct kmemtrace_alloc_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; + size_t bytes_req; + size_t bytes_alloc; + gfp_t gfp_flags; + int node; +}; + +struct kmemtrace_free_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -280,6 +301,10 @@ extern void __ftrace_bad_type(void); TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ + TRACE_KMEM_ALLOC); \ + IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ + TRACE_KMEM_FREE); \ __ftrace_bad_type(); \ } while (0) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b5417e23ba9..b0f239e443b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -803,26 +803,6 @@ config FIREWIRE_OHCI_REMOTE_DMA If unsure, say N. -config KMEMTRACE - bool "Kernel memory tracer (kmemtrace)" - depends on RELAY && DEBUG_FS && MARKERS - help - kmemtrace provides tracing for slab allocator functions, such as - kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected - data is then fed to the userspace application in order to analyse - allocation hotspots, internal fragmentation and so on, making it - possible to see how well an allocator performs, as well as debug - and profile kernel code. - - This requires an userspace application to use. See - Documentation/vm/kmemtrace.txt for more information. - - Saying Y will make the kernel somewhat larger and slower. However, - if you disable kmemtrace at run-time or boot-time, the performance - impact is minimal (depending on the arch the kernel is built for). - - If unsure, say N. - menuconfig BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK diff --git a/mm/kmemtrace.c b/mm/kmemtrace.c index 2a70a805027..0573b5080cc 100644 --- a/mm/kmemtrace.c +++ b/mm/kmemtrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #define KMEMTRACE_SUBBUF_SIZE 524288 #define KMEMTRACE_DEF_N_SUBBUFS 20 diff --git a/mm/slob.c b/mm/slob.c index 0f1a49f4069..4d1c0fc33b6 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include /* diff --git a/mm/slub.c b/mm/slub.c index cc4001fee7a..7bf8cf8ec08 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 034939b65ad5ff64b9709210b3469a95153c51a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Jan 2009 10:03:56 -0800 Subject: tracing/ftrace: handle more than one stat file per tracer Impact: new API for tracers Make the stat tracing API reentrant. And also provide the new directory /debugfs/tracing/trace_stat which will contain all the stat files for the current active tracer. Now a tracer will, if desired, want to provide a zero terminated array of tracer_stat structures. Each one contains the callbacks necessary for one stat file. It have to provide at least a name for its stat file, an iterator with stat_start/start_next callback and an output callback for one stat entry. Also adapt the branch tracer to this new API. We create two files "all" and "annotated" inside the /debugfs/tracing/trace_stat directory, making the both stats simultaneously available instead of needing to change an option to switch from one stat file to another. The output of these stats haven't changed. Changes in v2: _ Apply the previous memory leak fix (rebase against tip/master) Changes in v3: _ Merge the patch that adapted the branch tracer to this Api in this patch to not break the kernel build. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 35 ++++--- kernel/trace/trace_branch.c | 69 ++++++------- kernel/trace/trace_stat.c | 230 ++++++++++++++++++++++++++++++++------------ 3 files changed, 217 insertions(+), 117 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 94ed45e93a8..b3f9ad1b4d8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -334,6 +334,25 @@ struct tracer_flags { /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b +/* + * If you want to provide a stat file (one-shot statistics), fill + * an iterator with stat_start/stat_next and a stat_show callbacks. + * The others callbacks are optional. + */ +struct tracer_stat { + /* The name of your stat file */ + const char *name; + /* Iteration over statistic entries */ + void *(*stat_start)(void); + void *(*stat_next)(void *prev, int idx); + /* Compare two entries for sorting (optional) for stats */ + int (*stat_cmp)(void *p1, void *p2); + /* Print a stat entry */ + int (*stat_show)(struct seq_file *s, void *p); + /* Print the headers of your stat entries */ + int (*stat_headers)(struct seq_file *s); +}; + /* * A specific tracer, represented by methods that operate on a trace array: */ @@ -361,21 +380,7 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; - - /* - * If you change one of the following on tracing runtime, recall - * init_tracer_stat() - */ - - /* Iteration over statistic entries */ - void *(*stat_start)(void); - void *(*stat_next)(void *prev, int idx); - /* Compare two entries for sorting (optional) for stats */ - int (*stat_cmp)(void *p1, void *p2); - /* Print a stat entry */ - int (*stat_show)(struct seq_file *s, void *p); - /* Print the headers of your stat entries */ - int (*stat_headers)(struct seq_file *s); + struct tracer_stat *stats; }; struct trace_seq { diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 4785a3b9bc4..da5cf3e5581 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -306,19 +306,6 @@ static int annotated_branch_stat_cmp(void *p1, void *p2) } #ifdef CONFIG_PROFILE_ALL_BRANCHES -enum { - TRACE_BRANCH_OPT_ALL = 0x1 -}; - -static struct tracer_opt branch_opts[] = { - { TRACER_OPT(stat_all_branch, TRACE_BRANCH_OPT_ALL) }, - { } -}; - -static struct tracer_flags branch_flags = { - .val = 0, - .opts = branch_opts -}; extern unsigned long __start_branch_profile[]; extern unsigned long __stop_branch_profile[]; @@ -352,28 +339,36 @@ all_branch_stat_next(void *v, int idx) return p; } -static int branch_set_flag(u32 old_flags, u32 bit, int set) -{ - if (bit == TRACE_BRANCH_OPT_ALL) { - if (set) { - branch_trace.stat_headers = all_branch_stat_headers; - branch_trace.stat_start = all_branch_stat_start; - branch_trace.stat_next = all_branch_stat_next; - branch_trace.stat_cmp = NULL; - } else { - branch_trace.stat_headers = - annotated_branch_stat_headers; - branch_trace.stat_start = annotated_branch_stat_start; - branch_trace.stat_next = annotated_branch_stat_next; - branch_trace.stat_cmp = annotated_branch_stat_cmp; - } - init_tracer_stat(&branch_trace); - } - return 0; -} +static struct tracer_stat branch_stats[] = { + {.name = "annotated", + .stat_start = annotated_branch_stat_start, + .stat_next = annotated_branch_stat_next, + .stat_cmp = annotated_branch_stat_cmp, + .stat_headers = annotated_branch_stat_headers, + .stat_show = branch_stat_show}, + {.name = "all", + .stat_start = all_branch_stat_start, + .stat_next = all_branch_stat_next, + .stat_headers = all_branch_stat_headers, + .stat_show = branch_stat_show}, + + { } +}; +#else +static struct tracer_stat branch_stats[] = { + {.name = "annotated", + .stat_start = annotated_branch_stat_start, + .stat_next = annotated_branch_stat_next, + .stat_cmp = annotated_branch_stat_cmp, + .stat_headers = annotated_branch_stat_headers, + .stat_show = branch_stat_show}, + + { } +}; #endif /* CONFIG_PROFILE_ALL_BRANCHES */ + static struct tracer branch_trace __read_mostly = { .name = "branch", @@ -383,16 +378,8 @@ static struct tracer branch_trace __read_mostly = #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_branch, #endif /* CONFIG_FTRACE_SELFTEST */ -#endif /* CONFIG_BRANCH_TRACER */ - .stat_start = annotated_branch_stat_start, - .stat_next = annotated_branch_stat_next, - .stat_show = branch_stat_show, - .stat_headers = annotated_branch_stat_headers, - .stat_cmp = annotated_branch_stat_cmp, -#ifdef CONFIG_PROFILE_ALL_BRANCHES - .flags = &branch_flags, - .set_flag = branch_set_flag, #endif + .stats = branch_stats }; __init static int init_branch_trace(void) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index f110ce9ce7f..1515f9e7adf 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -21,37 +21,87 @@ struct trace_stat_list { void *stat; }; -static LIST_HEAD(stat_list); - -/* - * This is a copy of the current tracer to avoid racy - * and dangerous output while the current tracer is - * switched. - */ -static struct tracer current_tracer; +/* A stat session is the stats output in one file */ +struct tracer_stat_session { + struct tracer_stat *ts; + struct list_head stat_list; + struct mutex stat_mutex; +}; -/* - * Protect both the current tracer and the global - * stat list. - */ -static DEFINE_MUTEX(stat_list_mutex); +/* All of the sessions currently in use. Each stat file embeed one session */ +static struct tracer_stat_session **all_stat_sessions; +static int nb_sessions; +static struct dentry *stat_dir, **stat_files; -static void reset_stat_list(void) +static void reset_stat_session(struct tracer_stat_session *session) { struct trace_stat_list *node, *next; - list_for_each_entry_safe(node, next, &stat_list, list) + list_for_each_entry_safe(node, next, &session->stat_list, list) kfree(node); - INIT_LIST_HEAD(&stat_list); + INIT_LIST_HEAD(&session->stat_list); } -void init_tracer_stat(struct tracer *trace) +/* Called when a tracer is initialized */ +static int init_all_sessions(int nb, struct tracer_stat *ts) { - mutex_lock(&stat_list_mutex); - current_tracer = *trace; - mutex_unlock(&stat_list_mutex); + int i, j; + struct tracer_stat_session *session; + + nb_sessions = 0; + + if (all_stat_sessions) { + for (i = 0; i < nb_sessions; i++) { + session = all_stat_sessions[i]; + reset_stat_session(session); + mutex_destroy(&session->stat_mutex); + kfree(session); + } + } + all_stat_sessions = kmalloc(sizeof(struct tracer_stat_session *) * nb, + GFP_KERNEL); + if (!all_stat_sessions) + return -ENOMEM; + + for (i = 0; i < nb; i++) { + session = kmalloc(sizeof(struct tracer_stat_session) * nb, + GFP_KERNEL); + if (!session) + goto free_sessions; + + INIT_LIST_HEAD(&session->stat_list); + mutex_init(&session->stat_mutex); + session->ts = &ts[i]; + all_stat_sessions[i] = session; + } + nb_sessions = nb; + return 0; + +free_sessions: + + for (j = 0; j < i; j++) + kfree(all_stat_sessions[i]); + + kfree(all_stat_sessions); + all_stat_sessions = NULL; + + return -ENOMEM; +} + +static int basic_tracer_stat_checks(struct tracer_stat *ts) +{ + int i; + + if (!ts) + return 0; + + for (i = 0; ts[i].name; i++) { + if (!ts[i].stat_start || !ts[i].stat_next || !ts[i].stat_show) + return -EBUSY; + } + return i; } /* @@ -69,22 +119,19 @@ static int dummy_cmp(void *p1, void *p2) * All of these copies and sorting are required on all opening * since the stats could have changed between two file sessions. */ -static int stat_seq_init(void) +static int stat_seq_init(struct tracer_stat_session *session) { struct trace_stat_list *iter_entry, *new_entry; + struct tracer_stat *ts = session->ts; void *prev_stat; int ret = 0; int i; - mutex_lock(&stat_list_mutex); - reset_stat_list(); - - if (!current_tracer.stat_start || !current_tracer.stat_next || - !current_tracer.stat_show) - goto exit; + mutex_lock(&session->stat_mutex); + reset_stat_session(session); - if (!current_tracer.stat_cmp) - current_tracer.stat_cmp = dummy_cmp; + if (!ts->stat_cmp) + ts->stat_cmp = dummy_cmp; /* * The first entry. Actually this is the second, but the first @@ -97,9 +144,10 @@ static int stat_seq_init(void) } INIT_LIST_HEAD(&new_entry->list); - list_add(&new_entry->list, &stat_list); - new_entry->stat = current_tracer.stat_start(); + list_add(&new_entry->list, &session->stat_list); + + new_entry->stat = ts->stat_start(); prev_stat = new_entry->stat; /* @@ -114,15 +162,16 @@ static int stat_seq_init(void) } INIT_LIST_HEAD(&new_entry->list); - new_entry->stat = current_tracer.stat_next(prev_stat, i); + new_entry->stat = ts->stat_next(prev_stat, i); /* End of insertion */ if (!new_entry->stat) break; - list_for_each_entry(iter_entry, &stat_list, list) { + list_for_each_entry(iter_entry, &session->stat_list, list) { + /* Insertion with a descendent sorting */ - if (current_tracer.stat_cmp(new_entry->stat, + if (ts->stat_cmp(new_entry->stat, iter_entry->stat) > 0) { list_add_tail(&new_entry->list, @@ -131,7 +180,7 @@ static int stat_seq_init(void) /* The current smaller value */ } else if (list_is_last(&iter_entry->list, - &stat_list)) { + &session->stat_list)) { list_add(&new_entry->list, &iter_entry->list); break; } @@ -140,49 +189,49 @@ static int stat_seq_init(void) prev_stat = new_entry->stat; } exit: - mutex_unlock(&stat_list_mutex); + mutex_unlock(&session->stat_mutex); return ret; exit_free_list: - reset_stat_list(); - mutex_unlock(&stat_list_mutex); + reset_stat_session(session); + mutex_unlock(&session->stat_mutex); return ret; } static void *stat_seq_start(struct seq_file *s, loff_t *pos) { - struct list_head *l = (struct list_head *)s->private; + struct tracer_stat_session *session = s->private; /* Prevent from tracer switch or stat_list modification */ - mutex_lock(&stat_list_mutex); + mutex_lock(&session->stat_mutex); /* If we are in the beginning of the file, print the headers */ - if (!*pos && current_tracer.stat_headers) - current_tracer.stat_headers(s); + if (!*pos && session->ts->stat_headers) + session->ts->stat_headers(s); - return seq_list_start(l, *pos); + return seq_list_start(&session->stat_list, *pos); } static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) { - struct list_head *l = (struct list_head *)s->private; + struct tracer_stat_session *session = s->private; - return seq_list_next(p, l, pos); + return seq_list_next(p, &session->stat_list, pos); } -static void stat_seq_stop(struct seq_file *m, void *p) +static void stat_seq_stop(struct seq_file *s, void *p) { - mutex_unlock(&stat_list_mutex); + struct tracer_stat_session *session = s->private; + mutex_unlock(&session->stat_mutex); } static int stat_seq_show(struct seq_file *s, void *v) { - struct trace_stat_list *entry; - - entry = list_entry(v, struct trace_stat_list, list); + struct tracer_stat_session *session = s->private; + struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); - return current_tracer.stat_show(s, entry->stat); + return session->ts->stat_show(s, l->stat); } static const struct seq_operations trace_stat_seq_ops = { @@ -192,15 +241,18 @@ static const struct seq_operations trace_stat_seq_ops = { .show = stat_seq_show }; +/* The session stat is refilled and resorted at each stat file opening */ static int tracing_stat_open(struct inode *inode, struct file *file) { int ret; + struct tracer_stat_session *session = inode->i_private; + ret = seq_open(file, &trace_stat_seq_ops); if (!ret) { struct seq_file *m = file->private_data; - m->private = &stat_list; - ret = stat_seq_init(); + m->private = session; + ret = stat_seq_init(session); } return ret; @@ -212,9 +264,12 @@ static int tracing_stat_open(struct inode *inode, struct file *file) */ static int tracing_stat_release(struct inode *i, struct file *f) { - mutex_lock(&stat_list_mutex); - reset_stat_list(); - mutex_unlock(&stat_list_mutex); + struct tracer_stat_session *session = i->i_private; + + mutex_lock(&session->stat_mutex); + reset_stat_session(session); + mutex_unlock(&session->stat_mutex); + return 0; } @@ -225,17 +280,70 @@ static const struct file_operations tracing_stat_fops = { .release = tracing_stat_release }; + +static void destroy_trace_stat_files(void) +{ + int i; + + if (stat_files) { + for (i = 0; i < nb_sessions; i++) + debugfs_remove(stat_files[i]); + kfree(stat_files); + stat_files = NULL; + } +} + +static void init_trace_stat_files(void) +{ + int i; + + if (!stat_dir || !nb_sessions) + return; + + stat_files = kmalloc(sizeof(struct dentry *) * nb_sessions, GFP_KERNEL); + + if (!stat_files) { + pr_warning("trace stat: not enough memory\n"); + return; + } + + for (i = 0; i < nb_sessions; i++) { + struct tracer_stat_session *session = all_stat_sessions[i]; + stat_files[i] = debugfs_create_file(session->ts->name, 0644, + stat_dir, + session, &tracing_stat_fops); + if (!stat_files[i]) + pr_warning("cannot create %s entry\n", + session->ts->name); + } +} + +void init_tracer_stat(struct tracer *trace) +{ + int nb = basic_tracer_stat_checks(trace->stats); + + destroy_trace_stat_files(); + + if (nb < 0) { + pr_warning("stat tracing: missing stat callback on %s\n", + trace->name); + return; + } + if (!nb) + return; + + init_all_sessions(nb, trace->stats); + init_trace_stat_files(); +} + static int __init tracing_stat_init(void) { struct dentry *d_tracing; - struct dentry *entry; d_tracing = tracing_init_dentry(); - entry = debugfs_create_file("trace_stat", 0444, d_tracing, - NULL, - &tracing_stat_fops); - if (!entry) + stat_dir = debugfs_create_dir("trace_stat", d_tracing); + if (!stat_dir) pr_warning("Could not create debugfs " "'trace_stat' entry\n"); return 0; -- cgit v1.2.3-70-g09d2 From 002bb86d8d42f18937aef396c3ecd65c7e02e21a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 10 Jan 2009 11:34:13 -0800 Subject: tracing/ftrace: separate events tracing and stats tracing engine Impact: tracing's Api change Currently, the stat tracing depends on the events tracing. When you switch to a new tracer, the stats files of the previous tracer will disappear. But it's more scalable to separate those two engines. This way, we can keep the stat files of one or several tracers when we want, without bothering of multiple tracer stat files or tracer switching. To build/destroys its stats files, a tracer just have to call register_stat_tracer/unregister_stat_tracer everytimes it wants to. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 - kernel/trace/trace.h | 20 ----- kernel/trace/trace_branch.c | 108 ++++++++++++++----------- kernel/trace/trace_stat.c | 191 +++++++++++++++++++------------------------- kernel/trace/trace_stat.h | 31 +++++++ 5 files changed, 172 insertions(+), 180 deletions(-) create mode 100644 kernel/trace/trace_stat.h (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0418fc338b5..40217fb499e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2353,7 +2353,6 @@ static int tracing_set_tracer(char *buf) if (ret) goto out; } - init_tracer_stat(t); trace_branch_enable(tr); out: @@ -3218,7 +3217,6 @@ __init static int tracer_alloc_buffers(void) #else current_trace = &nop_trace; #endif - init_tracer_stat(current_trace); /* All seems OK, enable tracing */ tracing_disabled = 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b3f9ad1b4d8..79c872100dd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -334,24 +334,6 @@ struct tracer_flags { /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b -/* - * If you want to provide a stat file (one-shot statistics), fill - * an iterator with stat_start/stat_next and a stat_show callbacks. - * The others callbacks are optional. - */ -struct tracer_stat { - /* The name of your stat file */ - const char *name; - /* Iteration over statistic entries */ - void *(*stat_start)(void); - void *(*stat_next)(void *prev, int idx); - /* Compare two entries for sorting (optional) for stats */ - int (*stat_cmp)(void *p1, void *p2); - /* Print a stat entry */ - int (*stat_show)(struct seq_file *s, void *p); - /* Print the headers of your stat entries */ - int (*stat_headers)(struct seq_file *s); -}; /* * A specific tracer, represented by methods that operate on a trace array: @@ -466,8 +448,6 @@ void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); -void init_tracer_stat(struct tracer *trace); - extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index da5cf3e5581..ca017e0a9a2 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -16,12 +16,12 @@ #include #include "trace.h" +#include "trace_stat.h" #include "trace_output.h" -static struct tracer branch_trace; - #ifdef CONFIG_BRANCH_TRACER +static struct tracer branch_trace; static int branch_tracing_enabled __read_mostly; static DEFINE_MUTEX(branch_tracing_mutex); @@ -191,6 +191,30 @@ static struct trace_event trace_branch_event = { .binary = trace_nop_print, }; +static struct tracer branch_trace __read_mostly = +{ + .name = "branch", + .init = branch_trace_init, + .reset = branch_trace_reset, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_branch, +#endif /* CONFIG_FTRACE_SELFTEST */ +}; + +__init static int init_branch_tracer(void) +{ + int ret; + + ret = register_ftrace_event(&trace_branch_event); + if (!ret) { + printk(KERN_WARNING "Warning: could not register " + "branch events\n"); + return 1; + } + return register_tracer(&branch_trace); +} +device_initcall(init_branch_tracer); + #else static inline void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect) @@ -305,6 +329,29 @@ static int annotated_branch_stat_cmp(void *p1, void *p2) return 0; } +static struct tracer_stat annotated_branch_stats = { + .name = "branch_annotated", + .stat_start = annotated_branch_stat_start, + .stat_next = annotated_branch_stat_next, + .stat_cmp = annotated_branch_stat_cmp, + .stat_headers = annotated_branch_stat_headers, + .stat_show = branch_stat_show +}; + +__init static int init_annotated_branch_stats(void) +{ + int ret; + + ret = register_stat_tracer(&annotated_branch_stats); + if (!ret) { + printk(KERN_WARNING "Warning: could not register " + "annotated branches stats\n"); + return 1; + } + return 0; +} +fs_initcall(init_annotated_branch_stats); + #ifdef CONFIG_PROFILE_ALL_BRANCHES extern unsigned long __start_branch_profile[]; @@ -339,60 +386,25 @@ all_branch_stat_next(void *v, int idx) return p; } -static struct tracer_stat branch_stats[] = { - {.name = "annotated", - .stat_start = annotated_branch_stat_start, - .stat_next = annotated_branch_stat_next, - .stat_cmp = annotated_branch_stat_cmp, - .stat_headers = annotated_branch_stat_headers, - .stat_show = branch_stat_show}, - - {.name = "all", +static struct tracer_stat all_branch_stats = { + .name = "branch_all", .stat_start = all_branch_stat_start, .stat_next = all_branch_stat_next, .stat_headers = all_branch_stat_headers, - .stat_show = branch_stat_show}, - - { } -}; -#else -static struct tracer_stat branch_stats[] = { - {.name = "annotated", - .stat_start = annotated_branch_stat_start, - .stat_next = annotated_branch_stat_next, - .stat_cmp = annotated_branch_stat_cmp, - .stat_headers = annotated_branch_stat_headers, - .stat_show = branch_stat_show}, - - { } + .stat_show = branch_stat_show }; -#endif /* CONFIG_PROFILE_ALL_BRANCHES */ - -static struct tracer branch_trace __read_mostly = +__init static int all_annotated_branch_stats(void) { - .name = "branch", -#ifdef CONFIG_BRANCH_TRACER - .init = branch_trace_init, - .reset = branch_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_branch, -#endif /* CONFIG_FTRACE_SELFTEST */ -#endif - .stats = branch_stats -}; - -__init static int init_branch_trace(void) -{ -#ifdef CONFIG_BRANCH_TRACER int ret; - ret = register_ftrace_event(&trace_branch_event); + + ret = register_stat_tracer(&all_branch_stats); if (!ret) { - printk(KERN_WARNING "Warning: could not register branch events\n"); + printk(KERN_WARNING "Warning: could not register " + "all branches stats\n"); return 1; } -#endif - - return register_tracer(&branch_trace); + return 0; } -device_initcall(init_branch_trace); +fs_initcall(all_annotated_branch_stats); +#endif /* CONFIG_PROFILE_ALL_BRANCHES */ diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 1515f9e7adf..cb29282b948 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -10,28 +10,32 @@ #include -#include #include +#include "trace_stat.h" #include "trace.h" /* List of stat entries from a tracer */ struct trace_stat_list { - struct list_head list; - void *stat; + struct list_head list; + void *stat; }; /* A stat session is the stats output in one file */ struct tracer_stat_session { - struct tracer_stat *ts; - struct list_head stat_list; - struct mutex stat_mutex; + struct list_head session_list; + struct tracer_stat *ts; + struct list_head stat_list; + struct mutex stat_mutex; + struct dentry *file; }; /* All of the sessions currently in use. Each stat file embeed one session */ -static struct tracer_stat_session **all_stat_sessions; -static int nb_sessions; -static struct dentry *stat_dir, **stat_files; +static LIST_HEAD(all_stat_sessions); +static DEFINE_MUTEX(all_stat_sessions_mutex); + +/* The root directory for all stat files */ +static struct dentry *stat_dir; static void reset_stat_session(struct tracer_stat_session *session) @@ -44,66 +48,77 @@ static void reset_stat_session(struct tracer_stat_session *session) INIT_LIST_HEAD(&session->stat_list); } -/* Called when a tracer is initialized */ -static int init_all_sessions(int nb, struct tracer_stat *ts) +static void destroy_session(struct tracer_stat_session *session) { - int i, j; - struct tracer_stat_session *session; + debugfs_remove(session->file); + reset_stat_session(session); + mutex_destroy(&session->stat_mutex); + kfree(session); +} - nb_sessions = 0; - if (all_stat_sessions) { - for (i = 0; i < nb_sessions; i++) { - session = all_stat_sessions[i]; - reset_stat_session(session); - mutex_destroy(&session->stat_mutex); - kfree(session); - } - } - all_stat_sessions = kmalloc(sizeof(struct tracer_stat_session *) * nb, - GFP_KERNEL); - if (!all_stat_sessions) - return -ENOMEM; +static int init_stat_file(struct tracer_stat_session *session); - for (i = 0; i < nb; i++) { - session = kmalloc(sizeof(struct tracer_stat_session) * nb, - GFP_KERNEL); - if (!session) - goto free_sessions; +int register_stat_tracer(struct tracer_stat *trace) +{ + struct tracer_stat_session *session, *node, *tmp; + int ret; + + if (!trace) + return -EINVAL; + + if (!trace->stat_start || !trace->stat_next || !trace->stat_show) + return -EINVAL; - INIT_LIST_HEAD(&session->stat_list); - mutex_init(&session->stat_mutex); - session->ts = &ts[i]; - all_stat_sessions[i] = session; + /* Already registered? */ + mutex_lock(&all_stat_sessions_mutex); + list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { + if (node->ts == trace) + return -EINVAL; } - nb_sessions = nb; - return 0; + mutex_unlock(&all_stat_sessions_mutex); + + /* Init the session */ + session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL); + if (!session) + return -ENOMEM; -free_sessions: + session->ts = trace; + INIT_LIST_HEAD(&session->session_list); + INIT_LIST_HEAD(&session->stat_list); + mutex_init(&session->stat_mutex); + session->file = NULL; - for (j = 0; j < i; j++) - kfree(all_stat_sessions[i]); + ret = init_stat_file(session); + if (ret) { + destroy_session(session); + return ret; + } - kfree(all_stat_sessions); - all_stat_sessions = NULL; + /* Register */ + mutex_lock(&all_stat_sessions_mutex); + list_add_tail(&session->session_list, &all_stat_sessions); + mutex_unlock(&all_stat_sessions_mutex); - return -ENOMEM; + return 0; } -static int basic_tracer_stat_checks(struct tracer_stat *ts) +void unregister_stat_tracer(struct tracer_stat *trace) { - int i; + struct tracer_stat_session *node, *tmp; - if (!ts) - return 0; - - for (i = 0; ts[i].name; i++) { - if (!ts[i].stat_start || !ts[i].stat_next || !ts[i].stat_show) - return -EBUSY; + mutex_lock(&all_stat_sessions_mutex); + list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { + if (node->ts == trace) { + list_del(&node->session_list); + destroy_session(node); + break; + } } - return i; + mutex_unlock(&all_stat_sessions_mutex); } + /* * For tracers that don't provide a stat_cmp callback. * This one will force an immediate insertion on tail of @@ -280,63 +295,7 @@ static const struct file_operations tracing_stat_fops = { .release = tracing_stat_release }; - -static void destroy_trace_stat_files(void) -{ - int i; - - if (stat_files) { - for (i = 0; i < nb_sessions; i++) - debugfs_remove(stat_files[i]); - kfree(stat_files); - stat_files = NULL; - } -} - -static void init_trace_stat_files(void) -{ - int i; - - if (!stat_dir || !nb_sessions) - return; - - stat_files = kmalloc(sizeof(struct dentry *) * nb_sessions, GFP_KERNEL); - - if (!stat_files) { - pr_warning("trace stat: not enough memory\n"); - return; - } - - for (i = 0; i < nb_sessions; i++) { - struct tracer_stat_session *session = all_stat_sessions[i]; - stat_files[i] = debugfs_create_file(session->ts->name, 0644, - stat_dir, - session, &tracing_stat_fops); - if (!stat_files[i]) - pr_warning("cannot create %s entry\n", - session->ts->name); - } -} - -void init_tracer_stat(struct tracer *trace) -{ - int nb = basic_tracer_stat_checks(trace->stats); - - destroy_trace_stat_files(); - - if (nb < 0) { - pr_warning("stat tracing: missing stat callback on %s\n", - trace->name); - return; - } - if (!nb) - return; - - init_all_sessions(nb, trace->stats); - init_trace_stat_files(); -} - -static int __init tracing_stat_init(void) +static int tracing_stat_init(void) { struct dentry *d_tracing; @@ -348,4 +307,16 @@ static int __init tracing_stat_init(void) "'trace_stat' entry\n"); return 0; } -fs_initcall(tracing_stat_init); + +static int init_stat_file(struct tracer_stat_session *session) +{ + if (!stat_dir && tracing_stat_init()) + return -ENODEV; + + session->file = debugfs_create_file(session->ts->name, 0644, + stat_dir, + session, &tracing_stat_fops); + if (!session->file) + return -ENOMEM; + return 0; +} diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h new file mode 100644 index 00000000000..202274cf7f3 --- /dev/null +++ b/kernel/trace/trace_stat.h @@ -0,0 +1,31 @@ +#ifndef __TRACE_STAT_H +#define __TRACE_STAT_H + +#include + +/* + * If you want to provide a stat file (one-shot statistics), fill + * an iterator with stat_start/stat_next and a stat_show callbacks. + * The others callbacks are optional. + */ +struct tracer_stat { + /* The name of your stat file */ + const char *name; + /* Iteration over statistic entries */ + void *(*stat_start)(void); + void *(*stat_next)(void *prev, int idx); + /* Compare two entries for stats sorting */ + int (*stat_cmp)(void *p1, void *p2); + /* Print a stat entry */ + int (*stat_show)(struct seq_file *s, void *p); + /* Print the headers of your stat entries */ + int (*stat_headers)(struct seq_file *s); +}; + +/* + * Destroy or create a stat file + */ +extern int register_stat_tracer(struct tracer_stat *trace); +extern void unregister_stat_tracer(struct tracer_stat *trace); + +#endif /* __TRACE_STAT_H */ -- cgit v1.2.3-70-g09d2 From 5361499101306cfb776c3cfa0f69d0479bc63868 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 19:12:40 -0500 Subject: ftrace: add stack trace to function tracer Impact: new feature to stack trace any function Chris Mason asked about being able to pick and choose a function and get a stack trace from it. This feature enables his request. # echo io_schedule > /debug/tracing/set_ftrace_filter # echo function > /debug/tracing/current_tracer # echo func_stack_trace > /debug/tracing/trace_options Produces the following in /debug/tracing/trace: kjournald-702 [001] 135.673060: io_schedule <-sync_buffer kjournald-702 [002] 135.673671: <= sync_buffer <= __wait_on_bit <= out_of_line_wait_on_bit <= __wait_on_buffer <= sync_dirty_buffer <= journal_commit_transaction <= kjournald Note, be careful about turning this on without filtering the functions. You may find that you have a 10 second lag between typing and seeing what you typed. This is why the stack trace for the function tracer does not use the same stack_trace flag as the other tracers use. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 26 ++++++++----- kernel/trace/trace.h | 7 ++++ kernel/trace/trace_functions.c | 84 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dcb757f70d2..3c54cb12522 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -835,10 +835,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, trace_function(tr, data, ip, parent_ip, flags, pc); } -static void ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, - int skip, int pc) +static void __ftrace_trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc) { #ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; @@ -846,9 +846,6 @@ static void ftrace_trace_stack(struct trace_array *tr, struct stack_trace trace; unsigned long irq_flags; - if (!(trace_flags & TRACE_ITER_STACKTRACE)) - return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); if (!event) @@ -869,12 +866,23 @@ static void ftrace_trace_stack(struct trace_array *tr, #endif } +static void ftrace_trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc) +{ + if (!(trace_flags & TRACE_ITER_STACKTRACE)) + return; + + __ftrace_trace_stack(tr, data, flags, skip, pc); +} + void __trace_stack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, - int skip) + int skip, int pc) { - ftrace_trace_stack(tr, data, flags, skip, preempt_count()); + __ftrace_trace_stack(tr, data, flags, skip, pc); } static void ftrace_trace_userstack(struct trace_array *tr, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 79c872100dd..bf39a369e4b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -457,6 +457,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc); + extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_FUNCTION_TRACER @@ -467,6 +472,8 @@ void tracing_stop_function_trace(void); # define tracing_stop_function_trace() do { } while (0) #endif +extern int ftrace_function_enabled; + #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void (*tracer_switch_func_t)(void *private, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 9236d7e25a1..3a5fa08cedb 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -16,6 +16,8 @@ #include "trace.h" +static struct trace_array *func_trace; + static void start_function_trace(struct trace_array *tr) { tr->cpu = get_cpu(); @@ -34,6 +36,7 @@ static void stop_function_trace(struct trace_array *tr) static int function_trace_init(struct trace_array *tr) { + func_trace = tr; start_function_trace(tr); return 0; } @@ -48,12 +51,93 @@ static void function_trace_start(struct trace_array *tr) tracing_reset_online_cpus(tr); } +static void +function_stack_trace_call(unsigned long ip, unsigned long parent_ip) +{ + struct trace_array *tr = func_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + if (unlikely(!ftrace_function_enabled)) + return; + + /* + * Need to use raw, since this must be called before the + * recursive protection is performed. + */ + local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { + pc = preempt_count(); + /* + * skip over 5 funcs: + * __ftrace_trace_stack, + * __trace_stack, + * function_stack_trace_call + * ftrace_list_func + * ftrace_call + */ + __trace_stack(tr, data, flags, 5, pc); + } + + atomic_dec(&data->disabled); + local_irq_restore(flags); +} + +static struct ftrace_ops trace_stack_ops __read_mostly = +{ + .func = function_stack_trace_call, +}; + +/* Our two options */ +enum { + TRACE_FUNC_OPT_STACK = 0x1, +}; + +static struct tracer_opt func_opts[] = { +#ifdef CONFIG_STACKTRACE + { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, +#endif + { } /* Always set a last empty entry */ +}; + +static struct tracer_flags func_flags = { + .val = 0, /* By default: all flags disabled */ + .opts = func_opts +}; + +static int func_set_flag(u32 old_flags, u32 bit, int set) +{ + if (bit == TRACE_FUNC_OPT_STACK) { + /* do nothing if already set */ + if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) + return 0; + + if (set) + register_ftrace_function(&trace_stack_ops); + else + unregister_ftrace_function(&trace_stack_ops); + + return 0; + } + + return -EINVAL; +} + static struct tracer function_trace __read_mostly = { .name = "function", .init = function_trace_init, .reset = function_trace_reset, .start = function_trace_start, + .flags = &func_flags, + .set_flag = func_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function, #endif -- cgit v1.2.3-70-g09d2 From a225cdd263f340c864febb1992802fb5b08bc328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:06:03 -0500 Subject: ftrace: remove static from function tracer functions Impact: clean up After reorganizing the functions in trace.c and trace_function.c, they no longer need to be in global context. This patch makes the functions and one variable into static. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 --- kernel/trace/trace.h | 10 ---------- kernel/trace/trace_functions.c | 10 ++++++++-- 3 files changed, 8 insertions(+), 15 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2585ffb6c6b..7de6a94063d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -187,9 +187,6 @@ int tracing_is_enabled(void) return tracer_enabled; } -/* function tracing enabled */ -int ftrace_function_enabled; - /* * trace_buf_size is the size in bytes that is allocated * for a buffer. Note, the number of bytes is always rounded diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index bf39a369e4b..54b72781e92 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -464,16 +464,6 @@ void __trace_stack(struct trace_array *tr, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_FUNCTION_TRACER -void tracing_start_function_trace(void); -void tracing_stop_function_trace(void); -#else -# define tracing_start_function_trace() do { } while (0) -# define tracing_stop_function_trace() do { } while (0) -#endif - -extern int ftrace_function_enabled; - #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void (*tracer_switch_func_t)(void *private, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 61d0b73dabf..b3a320f8aba 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -16,8 +16,14 @@ #include "trace.h" +/* function tracing enabled */ +static int ftrace_function_enabled; + static struct trace_array *func_trace; +static void tracing_start_function_trace(void); +static void tracing_stop_function_trace(void); + static void start_function_trace(struct trace_array *tr) { func_trace = tr; @@ -177,7 +183,7 @@ static struct tracer_flags func_flags = { .opts = func_opts }; -void tracing_start_function_trace(void) +static void tracing_start_function_trace(void) { ftrace_function_enabled = 0; @@ -194,7 +200,7 @@ void tracing_start_function_trace(void) ftrace_function_enabled = 1; } -void tracing_stop_function_trace(void) +static void tracing_stop_function_trace(void) { ftrace_function_enabled = 0; /* OK if they are not registered */ -- cgit v1.2.3-70-g09d2 From b1818748b0cf9427e48acf9713295e829a0d715f Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:31:01 +0100 Subject: x86, ftrace, hw-branch-tracer: dump trace on oops Dump the branch trace on an oops (based on ftrace_dump_on_oops). Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 6 ++++++ include/linux/ftrace.h | 13 +++++++++++++ kernel/trace/trace.h | 1 - kernel/trace/trace_hw_branches.c | 29 ++++++++++++++++++++++------- 4 files changed, 41 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f866..077c9ea655f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -195,6 +196,11 @@ unsigned __kprobes long oops_begin(void) int cpu; unsigned long flags; + /* notify the hw-branch tracer so it may disable tracing and + add the last trace to the trace buffer - + the earlier this happens, the more useful the trace. */ + trace_hw_branch_oops(); + oops_enter(); /* racy, but better than risking deadlock. */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 05472148757..9f7880d87c3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -496,4 +496,17 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) #endif /* CONFIG_TRACING */ + +#ifdef CONFIG_HW_BRANCH_TRACER + +void trace_hw_branch(u64 from, u64 to); +void trace_hw_branch_oops(void); + +#else /* CONFIG_HW_BRANCH_TRACER */ + +static inline void trace_hw_branch(u64 from, u64 to) {} +static inline void trace_hw_branch_oops(void) {} + +#endif /* CONFIG_HW_BRANCH_TRACER */ + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54b72781e92..b96037d970d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,7 +438,6 @@ void trace_function(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 398195397c7..e56df2c7d67 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -40,6 +40,7 @@ static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); #define this_buffer per_cpu(buffer, smp_processor_id()) static int __read_mostly trace_hw_branches_enabled; +static struct trace_array *hw_branch_trace __read_mostly; /* @@ -128,6 +129,8 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { static int bts_trace_init(struct trace_array *tr) { + hw_branch_trace = tr; + register_hotcpu_notifier(&bts_hotcpu_notifier); tracing_reset_online_cpus(tr); bts_trace_start(tr); @@ -170,8 +173,9 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) return TRACE_TYPE_UNHANDLED; } -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) +void trace_hw_branch(u64 from, u64 to) { + struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; unsigned long irq1, irq2; @@ -204,8 +208,7 @@ void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) local_irq_restore(irq1); } -static void trace_bts_at(struct trace_array *tr, - const struct bts_trace *trace, void *at) +static void trace_bts_at(const struct bts_trace *trace, void *at) { struct bts_struct bts; int err = 0; @@ -220,7 +223,7 @@ static void trace_bts_at(struct trace_array *tr, switch (bts.qualifier) { case BTS_BRANCH: - trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); + trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); break; } } @@ -236,12 +239,15 @@ static void trace_bts_cpu(void *arg) const struct bts_trace *trace; unsigned char *at; - if (!this_tracer) + if (unlikely(!tr)) return; if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) return; + if (unlikely(!this_tracer)) + return; + ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) @@ -249,11 +255,11 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); out: ds_resume_bts(this_tracer); @@ -268,6 +274,15 @@ static void trace_bts_prepare(struct trace_iterator *iter) mutex_unlock(&bts_tracer_mutex); } +void trace_hw_branch_oops(void) +{ + mutex_lock(&bts_tracer_mutex); + + trace_bts_cpu(hw_branch_trace); + + mutex_unlock(&bts_tracer_mutex); +} + struct tracer bts_tracer __read_mostly = { .name = "hw-branch-tracer", -- cgit v1.2.3-70-g09d2 From c71a896154119f4ca9e89d6078f5f63ad60ef199 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 23 Jan 2009 12:06:27 -0200 Subject: blktrace: add ftrace plugin Impact: New way of using the blktrace infrastructure This drops the requirement of userspace utilities to use the blktrace facility. Configuration is done thru sysfs, adding a "trace" directory to the partition directory where blktrace can be enabled for the associated request_queue. The same filters present in the IOCTL interface are present as sysfs device attributes. The /sys/block/sdX/sdXN/trace/enable file allows tracing without any filters. The other files in this directory: pid, act_mask, start_lba and end_lba can be used with the same meaning as with the IOCTL interface. Using the sysfs interface will only setup the request_queue->blk_trace fields, tracing will only take place when the "blk" tracer is selected via the ftrace interface, as in the following example: To see the trace, one can use the /d/tracing/trace file or the /d/tracign/trace_pipe file, with semantics defined in the ftrace documentation in Documentation/ftrace.txt. [root@f10-1 ~]# cat /t/trace kjournald-305 [000] 3046.491224: 8,1 A WBS 6367 + 8 <- (8,1) 6304 kjournald-305 [000] 3046.491227: 8,1 Q R 6367 + 8 [kjournald] kjournald-305 [000] 3046.491236: 8,1 G RB 6367 + 8 [kjournald] kjournald-305 [000] 3046.491239: 8,1 P NS [kjournald] kjournald-305 [000] 3046.491242: 8,1 I RBS 6367 + 8 [kjournald] kjournald-305 [000] 3046.491251: 8,1 D WB 6367 + 8 [kjournald] kjournald-305 [000] 3046.491610: 8,1 U WS [kjournald] 1 -0 [000] 3046.511914: 8,1 C RS 6367 + 8 [6367] [root@f10-1 ~]# The default line context (prefix) format is the one described in the ftrace documentation, with the blktrace specific bits using its existing format, described in blkparse(8). If one wants to have the classic blktrace formatting, this is possible by using: [root@f10-1 ~]# echo blk_classic > /t/trace_options [root@f10-1 ~]# cat /t/trace 8,1 0 3046.491224 305 A WBS 6367 + 8 <- (8,1) 6304 8,1 0 3046.491227 305 Q R 6367 + 8 [kjournald] 8,1 0 3046.491236 305 G RB 6367 + 8 [kjournald] 8,1 0 3046.491239 305 P NS [kjournald] 8,1 0 3046.491242 305 I RBS 6367 + 8 [kjournald] 8,1 0 3046.491251 305 D WB 6367 + 8 [kjournald] 8,1 0 3046.491610 305 U WS [kjournald] 1 8,1 0 3046.511914 0 C RS 6367 + 8 [6367] [root@f10-1 ~]# Using the ftrace standard format allows more flexibility, such as the ability of asking for backtraces via trace_options: [root@f10-1 ~]# echo noblk_classic > /t/trace_options [root@f10-1 ~]# echo stacktrace > /t/trace_options [root@f10-1 ~]# cat /t/trace kjournald-305 [000] 3318.826779: 8,1 A WBS 6375 + 8 <- (8,1) 6312 kjournald-305 [000] 3318.826782: <= submit_bio <= submit_bh <= sync_dirty_buffer <= journal_commit_transaction <= kjournald <= kthread <= child_rip kjournald-305 [000] 3318.826836: 8,1 Q R 6375 + 8 [kjournald] kjournald-305 [000] 3318.826837: <= generic_make_request <= submit_bio <= submit_bh <= sync_dirty_buffer <= journal_commit_transaction <= kjournald <= kthread Please read the ftrace documentation to use aditional, standardized tracing filters such as /d/tracing/trace_cpumask, etc. See also /d/tracing/trace_mark to add comments in the trace stream, that is equivalent to the /d/block/sdaN/msg interface. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- block/blktrace.c | 651 +++++++++++++++++++++++++++++++++++++++++++++++++- fs/partitions/check.c | 7 + kernel/trace/trace.h | 1 + 3 files changed, 654 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/block/blktrace.c b/block/blktrace.c index b0a2cae886d..630f167f824 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -25,9 +25,27 @@ #include #include #include +#include <../kernel/trace/trace_output.h> static unsigned int blktrace_seq __read_mostly = 1; +static struct trace_array *blk_tr; +static int __read_mostly blk_tracer_enabled; + +/* Select an alternative, minimalistic output than the original one */ +#define TRACE_BLK_OPT_CLASSIC 0x1 + +static struct tracer_opt blk_tracer_opts[] = { + /* Default disable the minimalistic output */ + { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC ) }, + { } +}; + +static struct tracer_flags blk_tracer_flags = { + .val = 0, + .opts = blk_tracer_opts, +}; + /* Global reference count of probes */ static DEFINE_MUTEX(blk_probe_mutex); static atomic_t blk_probes_ref = ATOMIC_INIT(0); @@ -43,6 +61,9 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, { struct blk_io_trace *t; + if (!bt->rchan) + return; + t = relay_reserve(bt->rchan, sizeof(*t) + len); if (t) { const int cpu = smp_processor_id(); @@ -90,6 +111,16 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) unsigned long flags; char *buf; + if (blk_tr) { + va_start(args, fmt); + ftrace_vprintk(fmt, args); + va_end(args); + return; + } + + if (!bt->msg_data) + return; + local_irq_save(flags); buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); va_start(args, fmt); @@ -131,13 +162,14 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; + struct ring_buffer_event *event = NULL; struct blk_io_trace *t; unsigned long flags; unsigned long *sequence; pid_t pid; - int cpu; + int cpu, pc = 0; - if (unlikely(bt->trace_state != Blktrace_running)) + if (unlikely(bt->trace_state != Blktrace_running || !blk_tracer_enabled)) return; what |= ddir_act[rw & WRITE]; @@ -150,6 +182,24 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, pid = tsk->pid; if (unlikely(act_log_check(bt, what, sector, pid))) return; + cpu = raw_smp_processor_id(); + + if (blk_tr) { + struct trace_entry *ent; + tracing_record_cmdline(current); + + event = ring_buffer_lock_reserve(blk_tr->buffer, + sizeof(*t) + pdu_len, &flags); + if (!event) + return; + + ent = ring_buffer_event_data(event); + t = (struct blk_io_trace *)ent; + pc = preempt_count(); + tracing_generic_entry_update(ent, 0, pc); + ent->type = TRACE_BLK; + goto record_it; + } /* * A word about the locking here - we disable interrupts to reserve @@ -163,23 +213,33 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len); if (t) { - cpu = smp_processor_id(); sequence = per_cpu_ptr(bt->sequence, cpu); t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->sequence = ++(*sequence); t->time = ktime_to_ns(ktime_get()); + t->cpu = cpu; + t->pid = pid; +record_it: t->sector = sector; t->bytes = bytes; t->action = what; - t->pid = pid; t->device = bt->dev; - t->cpu = cpu; t->error = error; t->pdu_len = pdu_len; if (pdu_len) memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); + + if (blk_tr) { + ring_buffer_unlock_commit(blk_tr->buffer, event, flags); + if (pid != 0 && + (blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) == 0 && + (trace_flags & TRACE_ITER_STACKTRACE) != 0) + __trace_stack(blk_tr, NULL, flags, 5, pc); + trace_wake_up(); + return; + } } local_irq_restore(flags); @@ -888,3 +948,584 @@ static void blk_unregister_tracepoints(void) tracepoint_synchronize_unregister(); } + +/* + * struct blk_io_tracer formatting routines + */ + +static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) +{ + int i = 0; + + if (t->action & BLK_TC_DISCARD) rwbs[i++] = 'D'; + else if (t->action & BLK_TC_WRITE) rwbs[i++] = 'W'; + else if (t->bytes) rwbs[i++] = 'R'; + else rwbs[i++] = 'N'; + + if (t->action & BLK_TC_AHEAD) rwbs[i++] = 'A'; + if (t->action & BLK_TC_BARRIER) rwbs[i++] = 'B'; + if (t->action & BLK_TC_SYNC) rwbs[i++] = 'S'; + if (t->action & BLK_TC_META) rwbs[i++] = 'M'; + + rwbs[i] = '\0'; +} + +static inline +const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent) +{ + return (const struct blk_io_trace *)ent; +} + +static inline const void *pdu_start(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent) + 1; +} + +static inline u32 t_sec(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->bytes >> 9; +} + +static inline unsigned long long t_sector(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->sector; +} + +static inline __u16 t_error(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->sector; +} + +static __u64 get_pdu_int(const struct trace_entry *ent) +{ + const __u64 *val = pdu_start(ent); + return be64_to_cpu(*val); +} + +static void get_pdu_remap(const struct trace_entry *ent, + struct blk_io_trace_remap *r) +{ + const struct blk_io_trace_remap *__r = pdu_start(ent); + __u64 sector = __r->sector; + + r->device = be32_to_cpu(__r->device); + r->device_from = be32_to_cpu(__r->device_from); + r->sector = be64_to_cpu(sector); +} + +static int blk_log_action_iter(struct trace_iterator *iter, const char *act) +{ + char rwbs[6]; + unsigned long long ts = ns2usecs(iter->ts); + unsigned long usec_rem = do_div(ts, USEC_PER_SEC); + unsigned secs = (unsigned long)ts; + const struct trace_entry *ent = iter->ent; + const struct blk_io_trace *t = (const struct blk_io_trace *)ent; + + fill_rwbs(rwbs, t); + + return trace_seq_printf(&iter->seq, + "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ", + MAJOR(t->device), MINOR(t->device), iter->cpu, + secs, usec_rem, ent->pid, act, rwbs); +} + +static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, + const char *act) +{ + char rwbs[6]; + fill_rwbs(rwbs, t); + return trace_seq_printf(s, "%3d,%-3d %2s %3s ", + MAJOR(t->device), MINOR(t->device), act, rwbs); +} + +static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) +{ + const char *cmd = trace_find_cmdline(ent->pid); + + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%s]\n", + t_sector(ent), t_sec(ent), cmd); + return trace_seq_printf(s, "[%s]\n", cmd); +} + +static int blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) +{ + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), + t_sec(ent), t_error(ent)); + return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); +} + +static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) +{ + struct blk_io_trace_remap r = { .device = 0, }; + + get_pdu_remap(ent, &r); + return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", + t_sector(ent), + t_sec(ent), MAJOR(r.device), MINOR(r.device), + (unsigned long long)r.sector); +} + +static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) +{ + return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid)); +} + +static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) +{ + return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid), + get_pdu_int(ent)); +} + +static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) +{ + return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), + get_pdu_int(ent), trace_find_cmdline(ent->pid)); +} + +/* + * struct tracer operations + */ + +static void blk_tracer_print_header(struct seq_file *m) +{ + if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + return; + seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" + "# | | | | | |\n"); +} + +static void blk_tracer_start(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + mutex_lock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) + if (blk_register_tracepoints()) + atomic_dec(&blk_probes_ref); + mutex_unlock(&blk_probe_mutex); +} + +static int blk_tracer_init(struct trace_array *tr) +{ + blk_tr = tr; + blk_tracer_start(tr); + mutex_lock(&blk_probe_mutex); + blk_tracer_enabled++; + mutex_unlock(&blk_probe_mutex); + return 0; +} + +static void blk_tracer_stop(struct trace_array *tr) +{ + mutex_lock(&blk_probe_mutex); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void blk_tracer_reset(struct trace_array *tr) +{ + if (!atomic_read(&blk_probes_ref)) + return; + + mutex_lock(&blk_probe_mutex); + blk_tracer_enabled--; + WARN_ON(blk_tracer_enabled < 0); + mutex_unlock(&blk_probe_mutex); + + blk_tracer_stop(tr); +} + +static struct { + const char *act[2]; + int (*print)(struct trace_seq *s, const struct trace_entry *ent); +} what2act[] __read_mostly = { + [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, + [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, + [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, + [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, + [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, + [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, + [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, + [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, + [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, + [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, + [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, + [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, + [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, + [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic }, + [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, +}; + +static int blk_trace_event_print(struct trace_seq *s, struct trace_entry *ent, + int flags) +{ + const struct blk_io_trace *t = (struct blk_io_trace *)ent; + const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1); + int ret; + + if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + ret = trace_seq_printf(s, "Bad pc action %x\n", what); + else { + const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + ret = blk_log_action_seq(s, t, what2act[what].act[long_act]); + if (ret) + ret = what2act[what].print(s, ent); + } + + return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +} + +static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) +{ + const struct blk_io_trace *t; + u16 what; + int ret; + + if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + return TRACE_TYPE_UNHANDLED; + + t = (const struct blk_io_trace *)iter->ent; + what = t->action & ((1 << BLK_TC_SHIFT) - 1); + + if (unlikely(what == 0 || what > ARRAY_SIZE(what2act))) + ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what); + else { + const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE); + ret = blk_log_action_iter(iter, what2act[what].act[long_act]); + if (ret) + ret = what2act[what].print(&iter->seq, iter->ent); + } + + return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; +} + +static struct tracer blk_tracer __read_mostly = { + .name = "blk", + .init = blk_tracer_init, + .reset = blk_tracer_reset, + .start = blk_tracer_start, + .stop = blk_tracer_stop, + .print_header = blk_tracer_print_header, + .print_line = blk_tracer_print_line, + .flags = &blk_tracer_flags, +}; + +static struct trace_event trace_blk_event = { + .type = TRACE_BLK, + .trace = blk_trace_event_print, + .latency_trace = blk_trace_event_print, + .raw = trace_nop_print, + .hex = trace_nop_print, + .binary = trace_nop_print, +}; + +static int __init init_blk_tracer(void) +{ + if (!register_ftrace_event(&trace_blk_event)) { + pr_warning("Warning: could not register block events\n"); + return 1; + } + + if (register_tracer(&blk_tracer) != 0) { + pr_warning("Warning: could not register the block tracer\n"); + unregister_ftrace_event(&trace_blk_event); + return 1; + } + + return 0; +} + +device_initcall(init_blk_tracer); + +static int blk_trace_remove_queue(struct request_queue *q) +{ + struct blk_trace *bt; + + bt = xchg(&q->blk_trace, NULL); + if (bt == NULL) + return -EINVAL; + + kfree(bt); + return 0; +} + +/* + * Setup everything required to start tracing + */ +static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) +{ + struct blk_trace *old_bt, *bt = NULL; + int ret; + + ret = -ENOMEM; + bt = kzalloc(sizeof(*bt), GFP_KERNEL); + if (!bt) + goto err; + + bt->dev = dev; + bt->act_mask = (u16)-1; + bt->end_lba = -1ULL; + bt->trace_state = Blktrace_running; + + old_bt = xchg(&q->blk_trace, bt); + if (old_bt != NULL) { + (void)xchg(&q->blk_trace, old_bt); + kfree(bt); + ret = -EBUSY; + } + return 0; +err: + return ret; +} + +/* + * sysfs interface to enable and configure tracing + */ + +static ssize_t sysfs_blk_trace_enable_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev; + ssize_t ret = -ENXIO; + + lock_kernel(); + bdev = bdget(part_devt(p)); + if (bdev != NULL) { + struct request_queue *q = bdev_get_queue(bdev); + + if (q != NULL) { + mutex_lock(&bdev->bd_mutex); + ret = sprintf(buf, "%u\n", !!q->blk_trace); + mutex_unlock(&bdev->bd_mutex); + } + + bdput(bdev); + } + + unlock_kernel(); + return ret; +} + +static ssize_t sysfs_blk_trace_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct block_device *bdev; + struct request_queue *q; + struct hd_struct *p; + int value; + ssize_t ret = -ENXIO; + + if (count == 0 || sscanf(buf, "%d", &value) != 1) + goto out; + + lock_kernel(); + p = dev_to_part(dev); + bdev = bdget(part_devt(p)); + if (bdev == NULL) + goto out_unlock_kernel; + + q = bdev_get_queue(bdev); + if (q == NULL) + goto out_bdput; + + mutex_lock(&bdev->bd_mutex); + if (value) + ret = blk_trace_setup_queue(q, bdev->bd_dev); + else + ret = blk_trace_remove_queue(q); + mutex_unlock(&bdev->bd_mutex); + + if (ret == 0) + ret = count; +out_bdput: + bdput(bdev); +out_unlock_kernel: + unlock_kernel(); +out: + return ret; +} + +static ssize_t sysfs_blk_trace_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf); +static ssize_t sysfs_blk_trace_attr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +#define BLK_TRACE_DEVICE_ATTR(_name) \ + DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ + sysfs_blk_trace_attr_show, \ + sysfs_blk_trace_attr_store) + +static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, + sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store); +static BLK_TRACE_DEVICE_ATTR(act_mask); +static BLK_TRACE_DEVICE_ATTR(pid); +static BLK_TRACE_DEVICE_ATTR(start_lba); +static BLK_TRACE_DEVICE_ATTR(end_lba); + +static struct attribute *blk_trace_attrs[] = { + &dev_attr_enable.attr, + &dev_attr_act_mask.attr, + &dev_attr_pid.attr, + &dev_attr_start_lba.attr, + &dev_attr_end_lba.attr, + NULL +}; + +struct attribute_group blk_trace_attr_group = { + .name = "trace", + .attrs = blk_trace_attrs, +}; + +static int blk_str2act_mask(const char *str) +{ + int mask = 0; + char *copy = kstrdup(str, GFP_KERNEL), *s; + + if (copy == NULL) + return -ENOMEM; + + s = strstrip(copy); + + while (1) { + char *sep = strchr(s, ','); + + if (sep != NULL) + *sep = '\0'; + + if (strcasecmp(s, "barrier") == 0) + mask |= BLK_TC_BARRIER; + else if (strcasecmp(s, "complete") == 0) + mask |= BLK_TC_COMPLETE; + else if (strcasecmp(s, "fs") == 0) + mask |= BLK_TC_FS; + else if (strcasecmp(s, "issue") == 0) + mask |= BLK_TC_ISSUE; + else if (strcasecmp(s, "pc") == 0) + mask |= BLK_TC_PC; + else if (strcasecmp(s, "queue") == 0) + mask |= BLK_TC_QUEUE; + else if (strcasecmp(s, "read") == 0) + mask |= BLK_TC_READ; + else if (strcasecmp(s, "requeue") == 0) + mask |= BLK_TC_REQUEUE; + else if (strcasecmp(s, "sync") == 0) + mask |= BLK_TC_SYNC; + else if (strcasecmp(s, "write") == 0) + mask |= BLK_TC_WRITE; + + if (sep == NULL) + break; + + s = sep + 1; + } + kfree(copy); + + return mask; +} + +static ssize_t sysfs_blk_trace_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct request_queue *q; + struct block_device *bdev; + ssize_t ret = -ENXIO; + + lock_kernel(); + bdev = bdget(part_devt(p)); + if (bdev == NULL) + goto out_unlock_kernel; + + q = bdev_get_queue(bdev); + if (q == NULL) + goto out_bdput; + mutex_lock(&bdev->bd_mutex); + if (q->blk_trace == NULL) + ret = sprintf(buf, "disabled\n"); + else if (attr == &dev_attr_act_mask) + ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask); + else if (attr == &dev_attr_pid) + ret = sprintf(buf, "%u\n", q->blk_trace->pid); + else if (attr == &dev_attr_start_lba) + ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + else if (attr == &dev_attr_end_lba) + ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + mutex_unlock(&bdev->bd_mutex); +out_bdput: + bdput(bdev); +out_unlock_kernel: + unlock_kernel(); + return ret; +} + +static ssize_t sysfs_blk_trace_attr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct block_device *bdev; + struct request_queue *q; + struct hd_struct *p; + u64 value; + ssize_t ret = -ENXIO; + + if (count == 0) + goto out; + + if (attr == &dev_attr_act_mask) { + if (sscanf(buf, "%llx", &value) != 1) { + /* Assume it is a list of trace category names */ + value = blk_str2act_mask(buf); + if (value < 0) + goto out; + } + } else if (sscanf(buf, "%llu", &value) != 1) + goto out; + + lock_kernel(); + p = dev_to_part(dev); + bdev = bdget(part_devt(p)); + if (bdev == NULL) + goto out_unlock_kernel; + + q = bdev_get_queue(bdev); + if (q == NULL) + goto out_bdput; + + mutex_lock(&bdev->bd_mutex); + ret = 0; + if (q->blk_trace == NULL) + ret = blk_trace_setup_queue(q, bdev->bd_dev); + + if (ret == 0) { + if (attr == &dev_attr_act_mask) + q->blk_trace->act_mask = value; + else if (attr == &dev_attr_pid) + q->blk_trace->pid = value; + else if (attr == &dev_attr_start_lba) + q->blk_trace->start_lba = value; + else if (attr == &dev_attr_end_lba) + q->blk_trace->end_lba = value; + ret = count; + } + mutex_unlock(&bdev->bd_mutex); +out_bdput: + bdput(bdev); +out_unlock_kernel: + unlock_kernel(); +out: + return ret; +} diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 6d720243f5f..01714efdc65 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -268,6 +268,10 @@ ssize_t part_fail_store(struct device *dev, } #endif +#ifdef CONFIG_BLK_DEV_IO_TRACE +extern struct attribute_group blk_trace_attr_group; +#endif + static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); @@ -294,6 +298,9 @@ static struct attribute_group part_attr_group = { static struct attribute_group *part_attr_groups[] = { &part_attr_group, +#ifdef CONFIG_BLK_DEV_IO_TRACE + &blk_trace_attr_group, +#endif NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b96037d970d..e603a291134 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -32,6 +32,7 @@ enum trace_type { TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, + TRACE_BLK, __TRACE_LAST_TYPE, }; -- cgit v1.2.3-70-g09d2 From c4a8e8be2d43cc22b371e8e9c05c253409759d94 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 2 Feb 2009 20:29:21 -0200 Subject: trace: better manage the context info for events Impact: make trace_event more convenient for tracers All tracers (for the moment) that use the struct trace_event want to have the context info printed before their own output: the pid/cmdline, cpu, and timestamp. But some other tracers that want to implement their trace_event callbacks will not necessary need these information or they may want to format them as they want. This patch adds a new default-enabled trace option: TRACE_ITER_CONTEXT_INFO When disabled through: echo nocontext-info > /debugfs/tracing/trace_options The pid, cpu and timestamps headers will not be printed. IE with the sched_switch tracer with context-info (default): bash-2935 [001] 100.356561: 2935:120:S ==> [001] 0:140:R -0 [000] 100.412804: 0:140:R + [000] 11:115:S events/0 -0 [000] 100.412816: 0:140:R ==> [000] 11:115:R events/0 events/0-11 [000] 100.412829: 11:115:S ==> [000] 0:140:R Without context-info: 2935:120:S ==> [001] 0:140:R 0:140:R + [000] 11:115:S events/0 0:140:R ==> [000] 11:115:R events/0 11:115:S ==> [000] 0:140:R A tracer can disable it at runtime by clearing the bit TRACE_ITER_CONTEXT_INFO in trace_flags. The print routines were renamed to trace_print_context and trace_print_lat_context, so that they can be used by tracers if they want to use them for one of the trace_event callbacks. Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 149 +++++++++++--------------------------------- kernel/trace/trace.h | 7 ++- kernel/trace/trace_output.c | 107 +++++++++++++++++++++++++++++++ kernel/trace/trace_output.h | 3 + 4 files changed, 151 insertions(+), 115 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2f8ac1f008f..5ec49c3c159 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -227,7 +227,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; /** * trace_wake_up - wake up tasks waiting for trace input @@ -285,6 +285,7 @@ static const char *trace_options[] = { "userstacktrace", "sym-userobj", "printk-msg-only", + "context-info", NULL }; @@ -1171,8 +1172,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) } /* Find the next real entry, without updating the iterator itself */ -static struct trace_entry * -find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) +struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, + int *ent_cpu, u64 *ent_ts) { return __find_next_entry(iter, ent_cpu, ent_ts); } @@ -1351,57 +1352,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) seq_puts(m, "\n"); } -static void -lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) -{ - int hardirq, softirq; - char *comm; - - comm = trace_find_cmdline(entry->pid); - - trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); - trace_seq_printf(s, "%3d", cpu); - trace_seq_printf(s, "%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', - ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); - - hardirq = entry->flags & TRACE_FLAG_HARDIRQ; - softirq = entry->flags & TRACE_FLAG_SOFTIRQ; - if (hardirq && softirq) { - trace_seq_putc(s, 'H'); - } else { - if (hardirq) { - trace_seq_putc(s, 'h'); - } else { - if (softirq) - trace_seq_putc(s, 's'); - else - trace_seq_putc(s, '.'); - } - } - - if (entry->preempt_count) - trace_seq_printf(s, "%x", entry->preempt_count); - else - trace_seq_puts(s, "."); -} - -unsigned long preempt_mark_thresh = 100; - -static void -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, - unsigned long rel_usecs) -{ - trace_seq_printf(s, " %4lldus", abs_usecs); - if (rel_usecs > preempt_mark_thresh) - trace_seq_puts(s, "!: "); - else if (rel_usecs > 1) - trace_seq_puts(s, "+: "); - else - trace_seq_puts(s, " : "); -} - static void test_cpu_buff_start(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1419,46 +1369,24 @@ static void test_cpu_buff_start(struct trace_iterator *iter) trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); } -static enum print_line_t -print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) +static enum print_line_t print_lat_fmt(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); - struct trace_entry *next_entry; struct trace_event *event; - unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); struct trace_entry *entry = iter->ent; - unsigned long abs_usecs; - unsigned long rel_usecs; - u64 next_ts; - char *comm; int ret; test_cpu_buff_start(iter); - next_entry = find_next_entry(iter, NULL, &next_ts); - if (!next_entry) - next_ts = iter->ts; - rel_usecs = ns2usecs(next_ts - iter->ts); - abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); - - if (verbose) { - comm = trace_find_cmdline(entry->pid); - trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]" - " %ld.%03ldms (+%ld.%03ldms): ", - comm, - entry->pid, cpu, entry->flags, - entry->preempt_count, trace_idx, - ns2usecs(iter->ts), - abs_usecs/1000, - abs_usecs % 1000, rel_usecs/1000, - rel_usecs % 1000); - } else { - lat_print_generic(s, entry, cpu); - lat_print_timestamp(s, abs_usecs, rel_usecs); + event = ftrace_find_event(entry->type); + + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + ret = trace_print_lat_context(iter); + if (ret) + return ret; } - event = ftrace_find_event(entry->type); if (event && event->latency_trace) { ret = event->latency_trace(s, entry, sym_flags); if (ret) @@ -1476,33 +1404,20 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); struct trace_entry *entry; struct trace_event *event; - unsigned long usec_rem; - unsigned long long t; - unsigned long secs; - char *comm; int ret; entry = iter->ent; test_cpu_buff_start(iter); - comm = trace_find_cmdline(iter->ent->pid); - - t = ns2usecs(iter->ts); - usec_rem = do_div(t, 1000000ULL); - secs = (unsigned long)t; + event = ftrace_find_event(entry->type); - ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "[%03d] ", iter->cpu); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + ret = trace_print_context(iter); + if (ret) + return ret; + } - event = ftrace_find_event(entry->type); if (event && event->trace) { ret = event->trace(s, entry, sym_flags); if (ret) @@ -1525,10 +1440,12 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) entry = iter->ent; - ret = trace_seq_printf(s, "%d %d %llu ", - entry->pid, iter->cpu, iter->ts); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + ret = trace_seq_printf(s, "%d %d %llu ", + entry->pid, iter->cpu, iter->ts); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } event = ftrace_find_event(entry->type); if (event && event->raw) { @@ -1553,9 +1470,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) entry = iter->ent; - SEQ_PUT_HEX_FIELD_RET(s, entry->pid); - SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); - SEQ_PUT_HEX_FIELD_RET(s, iter->ts); + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + SEQ_PUT_HEX_FIELD_RET(s, entry->pid); + SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); + SEQ_PUT_HEX_FIELD_RET(s, iter->ts); + } event = ftrace_find_event(entry->type); if (event && event->hex) @@ -1575,7 +1494,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, field->buf); + ret = trace_seq_printf(s, "%s", field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1590,9 +1509,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) entry = iter->ent; - SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, entry->cpu); - SEQ_PUT_FIELD_RET(s, iter->ts); + if (trace_flags & TRACE_ITER_CONTEXT_INFO) { + SEQ_PUT_FIELD_RET(s, entry->pid); + SEQ_PUT_FIELD_RET(s, entry->cpu); + SEQ_PUT_FIELD_RET(s, iter->ts); + } event = ftrace_find_event(entry->type); if (event && event->binary) @@ -1643,7 +1564,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return print_raw_fmt(iter); if (iter->iter_flags & TRACE_FILE_LAT_FMT) - return print_lat_fmt(iter, iter->idx, iter->cpu); + return print_lat_fmt(iter); return print_trace_fmt(iter); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e603a291134..f0c7a0f08ca 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -405,6 +405,10 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); + +struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, + int *ent_cpu, u64 *ent_ts); + void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); @@ -591,7 +595,8 @@ enum trace_iterator_flags { TRACE_ITER_ANNOTATE = 0x2000, TRACE_ITER_USERSTACKTRACE = 0x4000, TRACE_ITER_SYM_USEROBJ = 0x8000, - TRACE_ITER_PRINTK_MSGONLY = 0x10000 + TRACE_ITER_PRINTK_MSGONLY = 0x10000, + TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */ }; /* diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 1a4e144a9f8..a5752d4d3c3 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -286,6 +286,113 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static void +lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) +{ + int hardirq, softirq; + char *comm; + + comm = trace_find_cmdline(entry->pid); + + trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%3d", cpu); + trace_seq_printf(s, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) { + trace_seq_putc(s, 'H'); + } else { + if (hardirq) { + trace_seq_putc(s, 'h'); + } else { + if (softirq) + trace_seq_putc(s, 's'); + else + trace_seq_putc(s, '.'); + } + } + + if (entry->preempt_count) + trace_seq_printf(s, "%x", entry->preempt_count); + else + trace_seq_puts(s, "."); +} + +static unsigned long preempt_mark_thresh = 100; + +static void +lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, + unsigned long rel_usecs) +{ + trace_seq_printf(s, " %4lldus", abs_usecs); + if (rel_usecs > preempt_mark_thresh) + trace_seq_puts(s, "!: "); + else if (rel_usecs > 1) + trace_seq_puts(s, "+: "); + else + trace_seq_puts(s, " : "); +} + +int trace_print_context(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + char *comm = trace_find_cmdline(entry->pid); + unsigned long long t = ns2usecs(iter->ts); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); + unsigned long secs = (unsigned long)t; + + if (!trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid)) + goto partial; + if (!trace_seq_printf(s, "[%03d] ", entry->cpu)) + goto partial; + if (!trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem)) + goto partial; + + return 0; + +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +int trace_print_lat_context(struct trace_iterator *iter) +{ + u64 next_ts; + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent, + *next_entry = trace_find_next_entry(iter, NULL, + &next_ts); + unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); + unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); + unsigned long rel_usecs; + + if (!next_entry) + next_ts = iter->ts; + rel_usecs = ns2usecs(next_ts - iter->ts); + + if (verbose) { + char *comm = trace_find_cmdline(entry->pid); + trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" + " %ld.%03ldms (+%ld.%03ldms): ", + comm, + entry->pid, entry->cpu, entry->flags, + entry->preempt_count, iter->idx, + ns2usecs(iter->ts), + abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, + rel_usecs % 1000); + } else { + lat_print_generic(s, entry, entry->cpu); + lat_print_timestamp(s, abs_usecs, rel_usecs); + } + + return 0; +} + static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; static int task_state_char(unsigned long state) diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 1cbab5e3dc9..ec2ed90f10f 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -33,6 +33,9 @@ int seq_print_userip_objs(const struct userstack_entry *entry, int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, unsigned long ip, unsigned long sym_flags); +int trace_print_context(struct trace_iterator *iter); +int trace_print_lat_context(struct trace_iterator *iter); + struct trace_event *ftrace_find_event(int type); int register_ftrace_event(struct trace_event *event); int unregister_ftrace_event(struct trace_event *event); -- cgit v1.2.3-70-g09d2 From 7be421510b91491d5aa5a29fa1005712039b95af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 01:13:37 -0500 Subject: trace: Remove unused trace_array_cpu parameter Impact: cleanup Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- block/blktrace.c | 2 +- kernel/trace/trace.c | 47 +++++++++++++++------------------------ kernel/trace/trace.h | 4 ---- kernel/trace/trace_functions.c | 8 +++---- kernel/trace/trace_irqsoff.c | 10 ++++----- kernel/trace/trace_sched_switch.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 12 +++++----- 7 files changed, 35 insertions(+), 52 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/block/blktrace.c b/block/blktrace.c index 1ebd068061e..d9d7146ee02 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -245,7 +245,7 @@ record_it: if (pid != 0 && !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) && (trace_flags & TRACE_ITER_STACKTRACE) != 0) - __trace_stack(blk_tr, NULL, flags, 5, pc); + __trace_stack(blk_tr, flags, 5, pc); trace_wake_up(); return; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a5e4c0af9bb..1d4ff568cc4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -776,7 +776,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } void -trace_function(struct trace_array *tr, struct trace_array_cpu *data, +trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { @@ -802,7 +802,6 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static void __trace_graph_entry(struct trace_array *tr, - struct trace_array_cpu *data, struct ftrace_graph_ent *trace, unsigned long flags, int pc) @@ -826,7 +825,6 @@ static void __trace_graph_entry(struct trace_array *tr, } static void __trace_graph_return(struct trace_array *tr, - struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) @@ -856,11 +854,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, int pc) { if (likely(!atomic_read(&data->disabled))) - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); } static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { @@ -891,27 +888,24 @@ static void __ftrace_trace_stack(struct trace_array *tr, } static void ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { if (!(trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(tr, data, flags, skip, pc); + __ftrace_trace_stack(tr, flags, skip, pc); } void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr, data, flags, skip, pc); + __ftrace_trace_stack(tr, flags, skip, pc); } static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, int pc) + unsigned long flags, int pc) { #ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; @@ -942,20 +936,17 @@ static void ftrace_trace_userstack(struct trace_array *tr, #endif } -void __trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags) +void __trace_userstack(struct trace_array *tr, unsigned long flags) { - ftrace_trace_userstack(tr, data, flags, preempt_count()); + ftrace_trace_userstack(tr, flags, preempt_count()); } static void -ftrace_trace_special(void *__tr, void *__data, +ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { struct ring_buffer_event *event; - struct trace_array_cpu *data = __data; struct trace_array *tr = __tr; struct special_entry *entry; unsigned long irq_flags; @@ -971,8 +962,8 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg2 = arg2; entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, irq_flags, 4, pc); - ftrace_trace_userstack(tr, data, irq_flags, pc); + ftrace_trace_stack(tr, irq_flags, 4, pc); + ftrace_trace_userstack(tr, irq_flags, pc); trace_wake_up(); } @@ -981,12 +972,11 @@ void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { - ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); + ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count()); } void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc) @@ -1010,13 +1000,12 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, flags, 5, pc); - ftrace_trace_userstack(tr, data, flags, pc); + ftrace_trace_stack(tr, flags, 5, pc); + ftrace_trace_userstack(tr, flags, pc); } void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *curr, unsigned long flags, int pc) @@ -1040,8 +1029,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, flags, 6, pc); - ftrace_trace_userstack(tr, data, flags, pc); + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); trace_wake_up(); } @@ -1064,7 +1053,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) data = tr->data[cpu]; if (likely(atomic_inc_return(&data->disabled) == 1)) - ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); + ftrace_trace_special(tr, arg1, arg2, arg3, pc); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -1092,7 +1081,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_entry(tr, data, trace, flags, pc); + __trace_graph_entry(tr, trace, flags, pc); } /* Only do the atomic if it is not already set */ if (!test_tsk_trace_graph(current)) @@ -1118,7 +1107,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_return(tr, data, trace, flags, pc); + __trace_graph_return(tr, trace, flags, pc); } if (!trace->depth) clear_tsk_trace_graph(current); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f0c7a0f08ca..df627a94869 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -419,14 +419,12 @@ void ftrace(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *cur, unsigned long flags, int pc); @@ -436,7 +434,6 @@ void trace_special(struct trace_array *tr, unsigned long arg2, unsigned long arg3, int pc); void trace_function(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); @@ -462,7 +459,6 @@ void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b3a320f8aba..d067cea2ccc 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -78,7 +78,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); ftrace_preempt_enable(resched); @@ -108,7 +108,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (likely(disabled == 1)) { pc = preempt_count(); - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); } atomic_dec(&data->disabled); @@ -139,7 +139,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) if (likely(disabled == 1)) { pc = preempt_count(); - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); /* * skip over 5 funcs: * __ftrace_trace_stack, @@ -148,7 +148,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) * ftrace_list_func * ftrace_call */ - __trace_stack(tr, data, flags, 5, pc); + __trace_stack(tr, flags, 5, pc); } atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index ed344b022a1..c6b442d88de 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags, preempt_count()); + trace_function(tr, ip, parent_ip, flags, preempt_count()); atomic_dec(&data->disabled); } @@ -153,7 +153,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); + trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); latency = nsecs_to_usecs(delta); @@ -177,7 +177,7 @@ out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); tracing_reset(tr, cpu); - trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); + trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -210,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, data, ip, parent_ip, flags, preempt_count()); + trace_function(tr, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -244,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, data, ip, parent_ip, flags, preempt_count()); + trace_function(tr, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index df175cb4564..c4f9add5ec9 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -43,7 +43,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, data = ctx_trace->data[cpu]; if (likely(!atomic_read(&data->disabled))) - tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); + tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); local_irq_restore(flags); } @@ -66,7 +66,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) data = ctx_trace->data[cpu]; if (likely(!atomic_read(&data->disabled))) - tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, + tracing_sched_wakeup_trace(ctx_trace, wakee, current, flags, pc); local_irq_restore(flags); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index a48c9b4b0c8..96d71648589 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -72,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) if (task_cpu(wakeup_task) != cpu) goto unlock; - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); unlock: __raw_spin_unlock(&wakeup_lock); @@ -152,8 +152,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); - tracing_sched_switch_trace(wakeup_trace, data, prev, next, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); /* * usecs conversion is slow so we try to delay the conversion @@ -254,10 +254,8 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) data = wakeup_trace->data[wakeup_cpu]; data->preempt_timestamp = ftrace_now(cpu); - tracing_sched_wakeup_trace(wakeup_trace, data, p, current, - flags, pc); - trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, - flags, pc); + tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: __raw_spin_unlock(&wakeup_lock); -- cgit v1.2.3-70-g09d2 From 51a763dd84253bab1d0a1e68e11a7753d1b702ca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 16:14:13 -0200 Subject: tracing: Introduce trace_buffer_{lock_reserve,unlock_commit} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: new API These new functions do what previously was being open coded, reducing the number of details ftrace plugin writers have to worry about. It also standardizes the handling of stacktrace, userstacktrace and other trace options we may introduce in the future. With this patch, for instance, the blk tracer (and some others already in the tree) can use the "userstacktrace" /d/tracing/trace_options facility. $ codiff /tmp/vmlinux.before /tmp/vmlinux.after linux-2.6-tip/kernel/trace/trace.c: trace_vprintk | -5 trace_graph_return | -22 trace_graph_entry | -26 trace_function | -45 __ftrace_trace_stack | -27 ftrace_trace_userstack | -29 tracing_sched_switch_trace | -66 tracing_stop | +1 trace_seq_to_user | -1 ftrace_trace_special | -63 ftrace_special | +1 tracing_sched_wakeup_trace | -70 tracing_reset_online_cpus | -1 13 functions changed, 2 bytes added, 355 bytes removed, diff: -353 linux-2.6-tip/block/blktrace.c: __blk_add_trace | -58 1 function changed, 58 bytes removed, diff: -58 linux-2.6-tip/kernel/trace/trace.c: trace_buffer_lock_reserve | +88 trace_buffer_unlock_commit | +86 2 functions changed, 174 bytes added, diff: +174 /tmp/vmlinux.after: 16 functions changed, 176 bytes added, 413 bytes removed, diff: -237 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- block/blktrace.c | 21 +++------ kernel/trace/kmemtrace.c | 19 +++----- kernel/trace/trace.c | 94 ++++++++++++++++++++++------------------ kernel/trace/trace.h | 11 +++++ kernel/trace/trace_boot.c | 20 +++------ kernel/trace/trace_branch.c | 7 ++- kernel/trace/trace_hw_branches.c | 7 ++- kernel/trace/trace_mmiotrace.c | 20 ++++----- kernel/trace/trace_power.c | 20 +++------ 9 files changed, 102 insertions(+), 117 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/block/blktrace.c b/block/blktrace.c index 8e52f24cc8f..834cd84037b 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -187,19 +187,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, cpu = raw_smp_processor_id(); if (blk_tr) { - struct trace_entry *ent; tracing_record_cmdline(current); - event = ring_buffer_lock_reserve(blk_tr->buffer, - sizeof(*t) + pdu_len); + pc = preempt_count(); + event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + sizeof(*t) + pdu_len, + 0, pc); if (!event) return; - - ent = ring_buffer_event_data(event); - t = (struct blk_io_trace *)ent; - pc = preempt_count(); - tracing_generic_entry_update(ent, 0, pc); - ent->type = TRACE_BLK; + t = ring_buffer_event_data(event); goto record_it; } @@ -241,12 +237,7 @@ record_it: memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); if (blk_tr) { - ring_buffer_unlock_commit(blk_tr->buffer, event); - if (pid != 0 && - !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) && - (trace_flags & TRACE_ITER_STACKTRACE) != 0) - __trace_stack(blk_tr, 0, 5, pc); - trace_wake_up(); + trace_buffer_unlock_commit(blk_tr, event, 0, pc); return; } } diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 256749d1032..ae201b3eda8 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -276,13 +276,12 @@ void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, if (!kmem_tracing_enabled) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC, + sizeof(*entry), 0, 0); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_KMEM_ALLOC; entry->call_site = call_site; entry->ptr = ptr; entry->bytes_req = bytes_req; @@ -290,9 +289,7 @@ void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, 0); } EXPORT_SYMBOL(kmemtrace_mark_alloc_node); @@ -307,20 +304,16 @@ void kmemtrace_mark_free(enum kmemtrace_type_id type_id, if (!kmem_tracing_enabled) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE, + sizeof(*entry), 0, 0); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - - entry->ent.type = TRACE_KMEM_FREE; entry->type_id = type_id; entry->call_site = call_site; entry->ptr = ptr; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, 0); } EXPORT_SYMBOL(kmemtrace_mark_free); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index eb453a238a6..8fad3776e84 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -776,6 +776,39 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + unsigned char type, + unsigned long len, + unsigned long flags, int pc) +{ + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(tr->buffer, len); + if (event != NULL) { + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, flags, pc); + ent->type = type; + } + + return event; +} +static void ftrace_trace_stack(struct trace_array *tr, + unsigned long flags, int skip, int pc); +static void ftrace_trace_userstack(struct trace_array *tr, + unsigned long flags, int pc); + +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + ring_buffer_unlock_commit(tr->buffer, event); + + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); + trace_wake_up(); +} + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, @@ -788,12 +821,11 @@ trace_function(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), + flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; ring_buffer_unlock_commit(tr->buffer, event); @@ -811,12 +843,11 @@ static void __trace_graph_entry(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_ENT; entry->graph_ent = *trace; ring_buffer_unlock_commit(global_trace.buffer, event); } @@ -832,12 +863,11 @@ static void __trace_graph_return(struct trace_array *tr, if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_RET; entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event); } @@ -861,13 +891,11 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct stack_entry *entry; struct stack_trace trace; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_STACK, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_STACK; - memset(&entry->caller, 0, sizeof(entry->caller)); trace.nr_entries = 0; @@ -908,12 +936,11 @@ static void ftrace_trace_userstack(struct trace_array *tr, if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_USER_STACK; memset(&entry->caller, 0, sizeof(entry->caller)); @@ -941,20 +968,15 @@ ftrace_trace_special(void *__tr, struct trace_array *tr = __tr; struct special_entry *entry; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, + sizeof(*entry), 0, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, pc); - entry->ent.type = TRACE_SPECIAL; entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, 0, 4, pc); - ftrace_trace_userstack(tr, 0, pc); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void @@ -973,12 +995,11 @@ tracing_sched_switch_trace(struct trace_array *tr, struct ring_buffer_event *event; struct ctx_switch_entry *entry; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_CTX, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_CTX; entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; entry->prev_state = prev->state; @@ -986,9 +1007,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, flags, 5, pc); - ftrace_trace_userstack(tr, flags, pc); + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -1000,12 +1019,11 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct ring_buffer_event *event; struct ctx_switch_entry *entry; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_WAKE, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_WAKE; entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; entry->prev_state = curr->state; @@ -1013,11 +1031,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - ring_buffer_unlock_commit(tr->buffer, event); - ftrace_trace_stack(tr, flags, 6, pc); - ftrace_trace_userstack(tr, flags, pc); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -2825,12 +2839,10 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) trace_buf[len] = 0; size = sizeof(*entry) + len + 1; - event = ring_buffer_lock_reserve(tr->buffer, size); + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, irq_flags, pc); - entry->ent.type = TRACE_PRINT; entry->ip = ip; entry->depth = depth; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index df627a94869..e03f157c772 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -403,6 +403,17 @@ int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); +struct ring_buffer_event; + +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + unsigned char type, + unsigned long len, + unsigned long flags, + int pc); +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 4e08debf662..7a30fc4c364 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -143,17 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT_CALL; entry->boot_call = *bt; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -170,17 +166,13 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT_RET; entry->boot_ret = *bt; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 770e52acfc1..48b2196abe3 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -52,14 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + pc = preempt_count(); + event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, + sizeof(*entry), flags, pc); if (!event) goto out; - pc = preempt_count(); entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_BRANCH; /* Strip off the path, only save the file */ p = f->file + strlen(f->file); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index e720c001db2..2aa1c9f4c7d 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -189,16 +189,15 @@ void trace_hw_branch(u64 from, u64 to) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, from); - entry->ent.type = TRACE_HW_BRANCHES; entry->ent.cpu = cpu; entry->from = from; entry->to = to; - ring_buffer_unlock_commit(tr->buffer, event); + trace_buffer_unlock_commit(tr, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 104ddebc11d..c401b908e80 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -307,19 +307,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, { struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; + int pc = preempt_count(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, + sizeof(*entry), 0, pc); if (!event) { atomic_inc(&dropped_count); return; } entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, preempt_count()); - entry->ent.type = TRACE_MMIO_RW; entry->rw = *rw; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -335,19 +333,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr, { struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; + int pc = preempt_count(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, + sizeof(*entry), 0, pc); if (!event) { atomic_inc(&dropped_count); return; } entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, preempt_count()); - entry->ent.type = TRACE_MMIO_MAP; entry->map = *map; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index 3b1a292d12d..bfc21f8079a 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -124,17 +124,13 @@ void trace_power_end(struct power_trace *it) it->end = ktime_get(); data = tr->data[smp_processor_id()]; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_POWER, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_POWER; entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -159,17 +155,13 @@ void trace_power_mark(struct power_trace *it, unsigned int type, it->end = it->stamp; data = tr->data[smp_processor_id()]; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + event = trace_buffer_lock_reserve(tr, TRACE_POWER, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_POWER; entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } -- cgit v1.2.3-70-g09d2 From b6f11df26fdc28324cf9c9e3b77f2dc985c1bb13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 18:02:00 -0200 Subject: trace: Call tracing_reset_online_cpus before tracer->init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup To make it easy for ftrace plugin writers, as this was open coded in the existing plugins Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- block/blktrace.c | 2 -- kernel/trace/trace.c | 8 +++++++- kernel/trace/trace.h | 1 + kernel/trace/trace_branch.c | 1 - kernel/trace/trace_functions.c | 17 +++-------------- kernel/trace/trace_functions_graph.c | 1 - kernel/trace/trace_hw_branches.c | 1 - kernel/trace/trace_nop.c | 1 - kernel/trace/trace_sched_switch.c | 8 +------- kernel/trace/trace_selftest.c | 18 +++++++++--------- kernel/trace/trace_sysprof.c | 14 ++++---------- 11 files changed, 25 insertions(+), 47 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/block/blktrace.c b/block/blktrace.c index 834cd84037b..ca6d32061e4 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -1086,8 +1086,6 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { - tracing_reset_online_cpus(tr); - mutex_lock(&blk_probe_mutex); if (atomic_add_return(1, &blk_probes_ref) == 1) if (blk_register_tracepoints()) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8fad3776e84..ef4dbac9556 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2171,6 +2171,12 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +int tracer_init(struct tracer *t, struct trace_array *tr) +{ + tracing_reset_online_cpus(tr); + return t->init(tr); +} + static int tracing_set_tracer(const char *buf) { struct trace_array *tr = &global_trace; @@ -2195,7 +2201,7 @@ static int tracing_set_tracer(const char *buf) current_trace = t; if (t->init) { - ret = t->init(tr); + ret = tracer_init(t, tr); if (ret) goto out; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e03f157c772..f2742fb1575 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,7 @@ struct trace_iterator { cpumask_var_t started; }; +int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 48b2196abe3..f8ae2c50e01 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -131,7 +131,6 @@ static void stop_branch_trace(struct trace_array *tr) static int branch_trace_init(struct trace_array *tr) { - tracing_reset_online_cpus(tr); start_branch_trace(tr); return 0; } diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index d067cea2ccc..36bf9568ccd 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -24,32 +24,21 @@ static struct trace_array *func_trace; static void tracing_start_function_trace(void); static void tracing_stop_function_trace(void); -static void start_function_trace(struct trace_array *tr) +static int function_trace_init(struct trace_array *tr) { func_trace = tr; tr->cpu = get_cpu(); - tracing_reset_online_cpus(tr); put_cpu(); tracing_start_cmdline_record(); tracing_start_function_trace(); -} - -static void stop_function_trace(struct trace_array *tr) -{ - tracing_stop_function_trace(); - tracing_stop_cmdline_record(); -} - -static int function_trace_init(struct trace_array *tr) -{ - start_function_trace(tr); return 0; } static void function_trace_reset(struct trace_array *tr) { - stop_function_trace(tr); + tracing_stop_function_trace(); + tracing_stop_cmdline_record(); } static void function_trace_start(struct trace_array *tr) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index c97594d826b..222f97d336a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -56,7 +56,6 @@ static int graph_trace_init(struct trace_array *tr) &trace_graph_entry); if (ret) return ret; - tracing_reset_online_cpus(tr); tracing_start_cmdline_record(); return 0; diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 2aa1c9f4c7d..ca4bbcfb9e2 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -132,7 +132,6 @@ static int bts_trace_init(struct trace_array *tr) hw_branch_trace = tr; register_hotcpu_notifier(&bts_hotcpu_notifier); - tracing_reset_online_cpus(tr); bts_trace_start(tr); return 0; diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 087b6cbf4ea..9aa84bde23c 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -48,7 +48,6 @@ static void stop_nop_trace(struct trace_array *tr) static int nop_trace_init(struct trace_array *tr) { ctx_trace = tr; - tracing_reset_online_cpus(tr); start_nop_trace(tr); return 0; } diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index c4f9add5ec9..30e14fe8589 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -185,12 +185,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr) ctx_trace = tr; } -static void start_sched_trace(struct trace_array *tr) -{ - tracing_reset_online_cpus(tr); - tracing_start_sched_switch_record(); -} - static void stop_sched_trace(struct trace_array *tr) { tracing_stop_sched_switch_record(); @@ -199,7 +193,7 @@ static void stop_sched_trace(struct trace_array *tr) static int sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; - start_sched_trace(tr); + tracing_start_sched_switch_record(); return 0; } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 5013812578b..445700e51f6 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -115,7 +115,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; @@ -189,7 +189,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; @@ -236,7 +236,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -290,7 +290,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -344,7 +344,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; @@ -476,7 +476,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -537,7 +537,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -569,7 +569,7 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return 0; @@ -596,7 +596,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index eaca5ad803f..84ca9d81e74 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -226,15 +226,6 @@ static void stop_stack_timers(void) stop_stack_timer(cpu); } -static void start_stack_trace(struct trace_array *tr) -{ - mutex_lock(&sample_timer_lock); - tracing_reset_online_cpus(tr); - start_stack_timers(); - tracer_enabled = 1; - mutex_unlock(&sample_timer_lock); -} - static void stop_stack_trace(struct trace_array *tr) { mutex_lock(&sample_timer_lock); @@ -247,7 +238,10 @@ static int stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; - start_stack_trace(tr); + mutex_lock(&sample_timer_lock); + start_stack_timers(); + tracer_enabled = 1; + mutex_unlock(&sample_timer_lock); return 0; } -- cgit v1.2.3-70-g09d2 From 1830b52d0de8c60c4f5dfbac134aa8f69d815801 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2009 19:38:43 -0500 Subject: trace: remove deprecated entry->cpu Impact: fix to prevent developers from using entry->cpu With the new ring buffer infrastructure, the cpu for the entry is implicit with which CPU buffer it is on. The original code use to record the current cpu into the generic entry header, which can be retrieved by entry->cpu. When the ring buffer was introduced, the users were convert to use the the cpu number of which cpu ring buffer was in use (this was passed to the tracers by the iterator: iter->cpu). Unfortunately, the cpu item in the entry structure was never removed. This allowed for developers to use it instead of the proper iter->cpu, unknowingly, using an uninitialized variable. This was not the fault of the developers, since it would seem like the logical place to retrieve the cpu identifier. This patch removes the cpu item from the entry structure and fixes all the users that should have been using iter->cpu. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 1 - kernel/trace/trace_hw_branches.c | 3 +-- kernel/trace/trace_output.c | 6 +++--- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd51cf0b94c..bd4d9f8818f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1531,7 +1531,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) if (trace_flags & TRACE_ITER_CONTEXT_INFO) { SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, entry->cpu); + SEQ_PUT_FIELD_RET(s, iter->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f0c7a0f08ca..5efc4c707f7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -45,7 +45,6 @@ enum trace_type { */ struct trace_entry { unsigned char type; - unsigned char cpu; unsigned char flags; unsigned char preempt_count; int pid; diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index fff3545fc86..549238a9b13 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -159,7 +159,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) trace_assign_type(it, entry); if (entry->type == TRACE_HW_BRANCHES) { - if (trace_seq_printf(seq, "%4d ", entry->cpu) && + if (trace_seq_printf(seq, "%4d ", iter->cpu) && seq_print_ip_sym(seq, it->to, symflags) && trace_seq_printf(seq, "\t <- ") && seq_print_ip_sym(seq, it->from, symflags) && @@ -195,7 +195,6 @@ void trace_hw_branch(u64 from, u64 to) entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, from); entry->ent.type = TRACE_HW_BRANCHES; - entry->ent.cpu = cpu; entry->from = from; entry->to = to; ring_buffer_unlock_commit(tr->buffer, event, irq2); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b7380eee9fa..463a310b1d3 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -333,7 +333,7 @@ int trace_print_context(struct trace_iterator *iter) unsigned long secs = (unsigned long)t; return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ", - comm, entry->pid, entry->cpu, secs, usec_rem); + comm, entry->pid, iter->cpu, secs, usec_rem); } int trace_print_lat_context(struct trace_iterator *iter) @@ -356,7 +356,7 @@ int trace_print_lat_context(struct trace_iterator *iter) char *comm = trace_find_cmdline(entry->pid); ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, - entry->pid, entry->cpu, entry->flags, + entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx, ns2usecs(iter->ts), abs_usecs / USEC_PER_MSEC, @@ -364,7 +364,7 @@ int trace_print_lat_context(struct trace_iterator *iter) rel_usecs / USEC_PER_MSEC, rel_usecs % USEC_PER_MSEC); } else { - ret = lat_print_generic(s, entry, entry->cpu); + ret = lat_print_generic(s, entry, iter->cpu); if (ret) ret = lat_print_timestamp(s, abs_usecs, rel_usecs); } -- cgit v1.2.3-70-g09d2 From 57794a9d48b63e34acbe63282628c9f029603308 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Fri, 6 Feb 2009 17:33:27 +0800 Subject: trace: trivial fixes in comment typos. Impact: clean up Fixed several typos in the comments. Signed-off-by: Wenji Huang Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 6 +++--- kernel/trace/trace.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7840e718c6c..5e302d636fc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -140,7 +140,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } #endif /** - * ftrace_make_nop - convert code into top + * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization * @rec: the mcount call site record * @addr: the address that the call site should be calling diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 68610031780..1796e018fbf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -465,7 +465,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) * it is not enabled then do nothing. * * If this record is not to be traced and - * it is enabled then disabled it. + * it is enabled then disable it. * */ if (rec->flags & FTRACE_FL_NOTRACE) { @@ -485,7 +485,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) return 0; - /* Record is not filtered and is not enabled do nothing */ + /* Record is not filtered or enabled, do nothing */ if (!fl) return 0; @@ -507,7 +507,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) } else { - /* if record is not enabled do nothing */ + /* if record is not enabled, do nothing */ if (!(rec->flags & FTRACE_FL_ENABLED)) return 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5efc4c707f7..f92aba52a89 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -616,12 +616,12 @@ extern struct tracer nop_trace; * preempt_enable (after a disable), a schedule might take place * causing an infinite recursion. * - * To prevent this, we read the need_recshed flag before + * To prevent this, we read the need_resched flag before * disabling preemption. When we want to enable preemption we * check the flag, if it is set, then we call preempt_enable_no_resched. * Otherwise, we call preempt_enable. * - * The rational for doing the above is that if need resched is set + * The rational for doing the above is that if need_resched is set * and we have yet to reschedule, we are either in an atomic location * (where we do not need to check for scheduling) or we are inside * the scheduler and do not want to resched. @@ -642,7 +642,7 @@ static inline int ftrace_preempt_disable(void) * * This is a scheduler safe way to enable preemption and not miss * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we were either inside an atomic or + * If resched is set, then we are either inside an atomic or * are inside the scheduler (we would have already scheduled * otherwise). In this case, we do not want to call normal * preempt_enable, but preempt_enable_no_resched instead. -- cgit v1.2.3-70-g09d2 From 7447dce96f2233d250bc39a4a10a42f7c3dd46fc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Feb 2009 21:33:57 +0100 Subject: tracing/function-graph-tracer: provide a selftest for the function graph tracer Making it more easy to do a basic regression test for this tracer. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 ++ kernel/trace/trace_functions_graph.c | 3 +++ kernel/trace/trace_selftest.c | 50 ++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b9838f4a692..a011ec06222 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -500,6 +500,8 @@ extern int DYN_FTRACE_TEST_NAME(void); #ifdef CONFIG_FTRACE_STARTUP_TEST extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_function_graph(struct tracer *trace, + struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 222f97d336a..88f8d9d80a9 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -750,6 +750,9 @@ static struct tracer graph_trace __read_mostly = { .print_line = print_graph_function, .print_header = print_graph_headers, .flags = &tracer_flags, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_function_graph, +#endif }; static __init int init_graph_trace(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 445700e51f6..0c9aa1457e5 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -13,6 +13,8 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_PRINT: case TRACE_SPECIAL: case TRACE_BRANCH: + case TRACE_GRAPH_ENT: + case TRACE_GRAPH_RET: return 1; } return 0; @@ -227,6 +229,54 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) } #endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * Pretty much the same than for the function tracer from which the selftest + * has been borrowed. + */ +int +trace_selftest_startup_function_graph(struct tracer *trace, + struct trace_array *tr) +{ + int ret; + unsigned long count; + + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } + + /* Sleep for a 1/10 of a second */ + msleep(100); + + tracing_stop(); + + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + + trace->reset(tr); + tracing_start(); + + if (!ret && !count) { + printk(KERN_CONT ".. no entries found .."); + ret = -1; + goto out; + } + + /* Don't test dynamic tracing, the function tracer already did */ + +out: + /* Stop it if we failed */ + if (ret) + ftrace_graph_stop(); + + return ret; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + #ifdef CONFIG_IRQSOFF_TRACER int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) -- cgit v1.2.3-70-g09d2 From 1292211058aaf872eeb2a0e2677d237916b4501f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Feb 2009 22:16:12 +0100 Subject: tracing/power: move the power trace headers to a dedicated file Impact: cleanup Move the power tracer headers to trace/power.h to keep ftrace.h and power bits more easy to maintain as separated topics. Signed-off-by: Frederic Weisbecker Cc: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 +- arch/x86/kernel/process.c | 2 +- include/linux/ftrace.h | 30 ------------------------- include/trace/power.h | 35 ++++++++++++++++++++++++++++++ kernel/trace/trace.h | 1 + kernel/trace/trace_power.c | 2 +- 6 files changed, 39 insertions(+), 33 deletions(-) create mode 100644 include/trace/power.h (limited to 'kernel/trace/trace.h') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c..7ed925edf4d 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e3086..026819ffcb0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5e302d636fc..106b7909d50 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -339,36 +339,6 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { -#ifdef CONFIG_POWER_TRACER - ktime_t stamp; - ktime_t end; - int type; - int state; -#endif -}; - -#ifdef CONFIG_POWER_TRACER -extern void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state); -extern void trace_power_end(struct power_trace *it); -#else -static inline void trace_power_start(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_mark(struct power_trace *it, unsigned int type, - unsigned int state) { } -static inline void trace_power_end(struct power_trace *it) { } -#endif - - /* * Structure that defines an entry function trace. */ diff --git a/include/trace/power.h b/include/trace/power.h new file mode 100644 index 00000000000..c7cefbcdaea --- /dev/null +++ b/include/trace/power.h @@ -0,0 +1,35 @@ +#ifndef _TRACE_POWER_H +#define _TRACE_POWER_H + +#include + +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; + +struct power_trace { +#ifdef CONFIG_POWER_TRACER + ktime_t stamp; + ktime_t end; + int type; + int state; +#endif +}; + +#ifdef CONFIG_POWER_TRACER +extern void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_end(struct power_trace *it); +#else +static inline void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_end(struct power_trace *it) { } +#endif + +#endif /* _TRACE_POWER_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a011ec06222..1ecfb9d2b36 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -10,6 +10,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index bfc21f8079a..b1d0d087d3a 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include -- cgit v1.2.3-70-g09d2 From b91facc367366b3f71375f337eb5997ec9ab4e69 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Feb 2009 18:30:44 +0100 Subject: tracing/function-graph-tracer: handle the leaf functions from trace_pipe When one cats the trace file, the leaf functions are printed without brackets: function(); whereas in the trace_pipe file we'll see the following: function() { } This is because the ring_buffer handling is not the same between those two files. On the trace file, when an entry is printed, the iterator advanced and then we can check the next entry. There is no iterator with trace_pipe, the current entry to print has been peeked and not consumed. So checking the next entry will still return the current one while we don't consume it. This patch introduces a new value for the output callbacks to ask the tracing core to not consume the current entry after printing it. We need it because we will have to consume the current entry ourself to check the next one. Now the trace_pipe is able to handle well the leaf functions. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 +-- kernel/trace/trace.h | 7 +++--- kernel/trace/trace_functions_graph.c | 48 ++++++++++++++++++++++-------------- 3 files changed, 36 insertions(+), 23 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 93040f1bef1..5b1e9a9e990 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2468,8 +2468,8 @@ waitagain: iter->seq.len = len; break; } - - trace_consume(iter); + if (ret != TRACE_TYPE_NO_CONSUME) + trace_consume(iter); if (iter->seq.len >= cnt) break; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1ecfb9d2b36..7b0518adf6d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -63,13 +63,13 @@ struct ftrace_entry { /* Function call entry */ struct ftrace_graph_ent_entry { - struct trace_entry ent; + struct trace_entry ent; struct ftrace_graph_ent graph_ent; }; /* Function return entry */ struct ftrace_graph_ret_entry { - struct trace_entry ent; + struct trace_entry ent; struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -309,7 +309,8 @@ extern void __ftrace_bad_type(void); enum print_line_t { TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 88f8d9d80a9..782ec0fdf45 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -212,8 +212,8 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu) return ret; } -static bool -trace_branch_is_leaf(struct trace_iterator *iter, +static struct ftrace_graph_ret_entry * +get_return_for_leaf(struct trace_iterator *iter, struct ftrace_graph_ent_entry *curr) { struct ring_buffer_iter *ring_iter; @@ -222,24 +222,33 @@ trace_branch_is_leaf(struct trace_iterator *iter, ring_iter = iter->buffer_iter[iter->cpu]; - if (!ring_iter) - return false; - - event = ring_buffer_iter_peek(ring_iter, NULL); + /* First peek to compare current entry and the next one */ + if (ring_iter) + event = ring_buffer_iter_peek(ring_iter, NULL); + else { + /* We need to consume the current entry to see the next one */ + ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); + event = ring_buffer_peek(iter->tr->buffer, iter->cpu, + NULL); + } if (!event) - return false; + return NULL; next = ring_buffer_event_data(event); if (next->ent.type != TRACE_GRAPH_RET) - return false; + return NULL; if (curr->ent.pid != next->ent.pid || curr->graph_ent.func != next->ret.func) - return false; + return NULL; - return true; + /* this is a leaf, now advance the iterator */ + if (ring_iter) + ring_buffer_read(ring_iter, NULL); + + return next; } /* Signal a overhead of time execution to the output */ @@ -376,18 +385,15 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s) /* Case of a leaf function on its call entry */ static enum print_line_t print_graph_entry_leaf(struct trace_iterator *iter, - struct ftrace_graph_ent_entry *entry, struct trace_seq *s) + struct ftrace_graph_ent_entry *entry, + struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) { - struct ftrace_graph_ret_entry *ret_entry; struct ftrace_graph_ret *graph_ret; - struct ring_buffer_event *event; struct ftrace_graph_ent *call; unsigned long long duration; int ret; int i; - event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); - ret_entry = ring_buffer_event_data(event); graph_ret = &ret_entry->ret; call = &entry->graph_ent; duration = graph_ret->rettime - graph_ret->calltime; @@ -457,7 +463,11 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry, if (!ret) return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + /* + * we already consumed the current entry to check the next one + * and see if this is a leaf. + */ + return TRACE_TYPE_NO_CONSUME; } static enum print_line_t @@ -469,6 +479,7 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, pid_t *last_entry = iter->private; struct trace_entry *ent = iter->ent; struct ftrace_graph_ent *call = &field->graph_ent; + struct ftrace_graph_ret_entry *leaf_ret; /* Pid */ if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE) @@ -504,8 +515,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } - if (trace_branch_is_leaf(iter, field)) - return print_graph_entry_leaf(iter, field, s); + leaf_ret = get_return_for_leaf(iter, field); + if (leaf_ret) + return print_graph_entry_leaf(iter, field, leaf_ret, s); else return print_graph_entry_nested(field, s, iter->ent->pid, cpu); -- cgit v1.2.3-70-g09d2 From 3c56819b14b00dd449bd776303e61f8532fad09f Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Mon, 9 Feb 2009 08:15:56 +0200 Subject: tracing: splice support for tracing_pipe Added and implemented tracing_pipe_fops->splice_read(). This allows userspace programs to get tracing data more efficiently. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 6 +++ 2 files changed, 142 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b1e9a9e990..9e29fdb0dfe 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -364,6 +365,25 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) return cnt; } +ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) +{ + int len; + void *ret; + + if (s->len <= s->readpos) + return -EBUSY; + + len = s->len - s->readpos; + if (cnt > len) + cnt = len; + ret = memcpy(buf, s->buffer + s->readpos, cnt); + if (!ret) + return -EFAULT; + + s->readpos += len; + return cnt; +} + static void trace_print_seq(struct seq_file *m, struct trace_seq *s) { @@ -2493,6 +2513,121 @@ out: return sret; } +static void tracing_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + __free_page(buf->page); +} + +static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, + unsigned int idx) +{ + __free_page(spd->pages[idx]); +} + +static struct pipe_buf_operations tracing_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = tracing_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static ssize_t tracing_splice_read_pipe(struct file *filp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags) +{ + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; + struct trace_iterator *iter = filp->private_data; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .nr_pages = 0, /* This gets updated below. */ + .flags = flags, + .ops = &tracing_pipe_buf_ops, + .spd_release = tracing_spd_release_pipe, + }; + ssize_t ret; + size_t count, rem; + unsigned int i; + + mutex_lock(&trace_types_lock); + + if (iter->trace->splice_read) { + ret = iter->trace->splice_read(iter, filp, + ppos, pipe, len, flags); + if (ret) + goto out; + } + + ret = tracing_wait_pipe(filp); + if (ret <= 0) + goto out; + + if (!iter->ent && !find_next_entry_inc(iter)) { + ret = -EFAULT; + goto out; + } + + /* Fill as many pages as possible. */ + for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { + pages[i] = alloc_page(GFP_KERNEL); + + /* Seq buffer is page-sized, exactly what we need. */ + for (;;) { + count = iter->seq.len; + ret = print_trace_line(iter); + count = iter->seq.len - count; + if (rem < count) { + rem = 0; + iter->seq.len -= count; + break; + } + if (ret == TRACE_TYPE_PARTIAL_LINE) { + iter->seq.len -= count; + break; + } + + trace_consume(iter); + rem -= count; + if (!find_next_entry_inc(iter)) { + rem = 0; + iter->ent = NULL; + break; + } + } + + /* Copy the data into the page, so we can start over. */ + ret = trace_seq_to_buffer(&iter->seq, + page_address(pages[i]), + iter->seq.len); + if (ret < 0) { + __free_page(pages[i]); + break; + } + partial[i].offset = 0; + partial[i].len = iter->seq.len; + + trace_seq_reset(&iter->seq); + } + + mutex_unlock(&trace_types_lock); + + spd.nr_pages = i; + + return splice_to_pipe(pipe, &spd); + +out: + mutex_unlock(&trace_types_lock); + + return ret; +} + static ssize_t tracing_entries_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -2656,6 +2791,7 @@ static struct file_operations tracing_pipe_fops = { .open = tracing_open_pipe, .poll = tracing_poll_pipe, .read = tracing_read_pipe, + .splice_read = tracing_splice_read_pipe, .release = tracing_release_pipe, }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7b0518adf6d..dbff0207b21 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -353,6 +353,12 @@ struct tracer { ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); + ssize_t (*splice_read)(struct trace_iterator *iter, + struct file *filp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); -- cgit v1.2.3-70-g09d2 From 6eaaa5d57e76c454479833fc8594cd7c3b75c789 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Feb 2009 02:25:00 +0100 Subject: tracing/core: use appropriate waiting on trace_pipe Impact: api and pipe waiting change Currently, the waiting used in tracing_read_pipe() is done through a 100 msecs schedule_timeout() loop which periodically check if there are traces on the buffer. This can cause small latencies for programs which are reading the incoming events. This patch makes the reader waiting for the trace_wait waitqueue except for few tracers such as the sched and functions tracers which might be already hold the runqueue lock while waking up the reader. This is performed through a new callback wait_pipe() on struct tracer. If none is implemented on a specific tracer, the default waiting for trace_wait queue is attached. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 62 ++++++++++++++++++++++++------------ kernel/trace/trace.h | 25 +++++++++++++-- kernel/trace/trace_functions.c | 1 + kernel/trace/trace_functions_graph.c | 1 + kernel/trace/trace_sched_switch.c | 1 + kernel/trace/trace_sched_wakeup.c | 1 + 6 files changed, 67 insertions(+), 24 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dc61e82faad..881a94474d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -499,6 +499,9 @@ __acquires(kernel_lock) else if (!type->flags->opts) type->flags->opts = dummy_tracer_opt; + if (!type->wait_pipe) + type->wait_pipe = default_wait_pipe; + #ifdef CONFIG_FTRACE_STARTUP_TEST if (type->selftest && !tracing_selftest_disabled) { @@ -1064,7 +1067,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - trace_buffer_unlock_commit(tr, event, flags, pc); + + ring_buffer_unlock_commit(tr->buffer, event); + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); } void @@ -2392,6 +2398,38 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) } } + +void default_wait_pipe(struct trace_iterator *iter) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); + + if (trace_empty(iter)) + schedule(); + + finish_wait(&trace_wait, &wait); +} + +/* + * This is a make-shift waitqueue. + * A tracer might use this callback on some rare cases: + * + * 1) the current tracer might hold the runqueue lock when it wakes up + * a reader, hence a deadlock (sched, function, and function graph tracers) + * 2) the function tracers, trace all functions, we don't want + * the overhead of calling wake_up and friends + * (and tracing them too) + * + * Anyway, this is really very primitive wakeup. + */ +void poll_wait_pipe(struct trace_iterator *iter) +{ + set_current_state(TASK_INTERRUPTIBLE); + /* sleep for 100 msecs, and try again. */ + schedule_timeout(HZ / 10); +} + /* Must be called with trace_types_lock mutex held. */ static int tracing_wait_pipe(struct file *filp) { @@ -2403,30 +2441,14 @@ static int tracing_wait_pipe(struct file *filp) return -EAGAIN; } - /* - * This is a make-shift waitqueue. The reason we don't use - * an actual wait queue is because: - * 1) we only ever have one waiter - * 2) the tracing, traces all functions, we don't want - * the overhead of calling wake_up and friends - * (and tracing them too) - * Anyway, this is really very primitive wakeup. - */ - set_current_state(TASK_INTERRUPTIBLE); - iter->tr->waiter = current; - mutex_unlock(&trace_types_lock); - /* sleep for 100 msecs, and try again. */ - schedule_timeout(HZ/10); + iter->trace->wait_pipe(iter); mutex_lock(&trace_types_lock); - iter->tr->waiter = NULL; - - if (signal_pending(current)) { + if (signal_pending(current)) return -EINTR; - } if (iter->trace != current_trace) return 0; @@ -2442,8 +2464,6 @@ static int tracing_wait_pipe(struct file *filp) */ if (!tracer_enabled && iter->pos) break; - - continue; } return 1; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dbff0207b21..eed732c151f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -337,18 +337,34 @@ struct tracer_flags { #define TRACER_OPT(s, b) .name = #s, .bit = b -/* - * A specific tracer, represented by methods that operate on a trace array: +/** + * struct tracer - a specific tracer and its callbacks to interact with debugfs + * @name: the name chosen to select it on the available_tracers file + * @init: called when one switches to this tracer (echo name > current_tracer) + * @reset: called when one switches to another tracer + * @start: called when tracing is unpaused (echo 1 > tracing_enabled) + * @stop: called when tracing is paused (echo 0 > tracing_enabled) + * @open: called when the trace file is opened + * @pipe_open: called when the trace_pipe file is opened + * @wait_pipe: override how the user waits for traces on trace_pipe + * @close: called when the trace file is released + * @read: override the default read callback on trace_pipe + * @splice_read: override the default splice_read callback on trace_pipe + * @selftest: selftest to run on boot (see trace_selftest.c) + * @print_headers: override the first lines that describe your columns + * @print_line: callback that prints a trace + * @set_flag: signals one of your private flags changed (trace_options file) + * @flags: your private flags */ struct tracer { const char *name; - /* Your tracer should raise a warning if init fails */ int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); + void (*wait_pipe)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, @@ -432,6 +448,9 @@ void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); +void default_wait_pipe(struct trace_iterator *iter); +void poll_wait_pipe(struct trace_iterator *iter); + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 4c113a8c466..c9a0b7df44f 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -225,6 +225,7 @@ static struct tracer function_trace __read_mostly = .init = function_trace_init, .reset = function_trace_reset, .start = function_trace_start, + .wait_pipe = poll_wait_pipe, .flags = &func_flags, .set_flag = func_set_flag, #ifdef CONFIG_FTRACE_SELFTEST diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 519a0cab153..0ff5cb66190 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -757,6 +757,7 @@ static struct tracer graph_trace __read_mostly = { .name = "function_graph", .open = graph_trace_open, .close = graph_trace_close, + .wait_pipe = poll_wait_pipe, .init = graph_trace_init, .reset = graph_trace_reset, .print_line = print_graph_function, diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 82fbb5a2df8..77132c2cf3d 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -221,6 +221,7 @@ static struct tracer sched_switch_trace __read_mostly = .reset = sched_switch_trace_reset, .start = sched_switch_trace_start, .stop = sched_switch_trace_stop, + .wait_pipe = poll_wait_pipe, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_sched_switch, #endif diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 276c51aaf31..db55f7aaa64 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -380,6 +380,7 @@ static struct tracer wakeup_rt_tracer __read_mostly = .reset = wakeup_tracer_reset, .start = wakeup_tracer_start, .stop = wakeup_tracer_stop, + .wait_pipe = poll_wait_pipe, .print_max = 1, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_wakeup, -- cgit v1.2.3-70-g09d2 From b04cc6b1f6398b0e0b60d37e27ce51b4899672ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Feb 2009 03:22:28 +0100 Subject: tracing/core: introduce per cpu tracing files Impact: split up tracing output per cpu Currently, on the tracing debugfs directory, three files are available to the user to let him extracting the trace output: - trace is an iterator through the ring-buffer. It's a reader but not a consumer It doesn't block when no more traces are available. - trace pretty similar to the former, except that it adds more informations such as prempt count, irq flag, ... - trace_pipe is a reader and a consumer, it will also block waiting for traces if necessary (heh, yes it's a pipe). The traces coming from different cpus are curretly mixed up inside these files. Sometimes it messes up the informations, sometimes it's useful, depending on what does the tracer capture. The tracing_cpumask file is useful to filter the output and select only the traces captured a custom defined set of cpus. But still it is not enough powerful to extract at the same time one trace buffer per cpu. So this patch creates a new directory: /debug/tracing/per_cpu/. Inside this directory, you will now find one trace_pipe file and one trace file per cpu. Which means if you have two cpus, you will have: trace0 trace1 trace_pipe0 trace_pipe1 And of course, reading these files will have the same effect than with the usual tracing files, except that you will only see the traces from the given cpu. The original all-in-one cpu trace file are still available on their original place. Until now, only one consumer was allowed on trace_pipe to avoid racy consuming on the ring-buffer. Now the approach changed a bit, you can have only one consumer per cpu. Which means you are allowed to read concurrently trace_pipe0 and trace_pipe1 But you can't have two readers on trace_pipe0 or trace_pipe1. Following the same logic, if there is one reader on the common trace_pipe, you can not have at the same time another reader on trace_pipe0 or in trace_pipe1. Because in trace_pipe is already a consumer in all cpu buffers in essence. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 168 +++++++++++++++++++++++++++++++++++++++++++-------- kernel/trace/trace.h | 3 + 2 files changed, 147 insertions(+), 24 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 11ba100f9a9..aa58b7bc847 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -98,6 +98,9 @@ static inline void ftrace_enable_cpu(void) static cpumask_var_t __read_mostly tracing_buffer_mask; +/* Define which cpu buffers are currently read in trace_pipe */ +static cpumask_var_t tracing_reader_cpumask; + #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) @@ -1195,10 +1198,25 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) { struct ring_buffer *buffer = iter->tr->buffer; struct trace_entry *ent, *next = NULL; + int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; int cpu; + /* + * If we are in a per_cpu trace file, don't bother by iterating over + * all cpu and peek directly. + */ + if (cpu_file > TRACE_PIPE_ALL_CPU) { + if (ring_buffer_empty_cpu(buffer, cpu_file)) + return NULL; + ent = peek_next_entry(iter, cpu_file, ent_ts); + if (ent_cpu) + *ent_cpu = cpu_file; + + return ent; + } + for_each_tracing_cpu(cpu) { if (ring_buffer_empty_cpu(buffer, cpu)) @@ -1279,6 +1297,7 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; + int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; @@ -1299,9 +1318,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) ftrace_disable_cpu(); - for_each_tracing_cpu(cpu) { - ring_buffer_iter_reset(iter->buffer_iter[cpu]); - } + if (cpu_file == TRACE_PIPE_ALL_CPU) { + for_each_tracing_cpu(cpu) + ring_buffer_iter_reset(iter->buffer_iter[cpu]); + } else + ring_buffer_iter_reset(iter->buffer_iter[cpu_file]); + ftrace_enable_cpu(); @@ -1653,6 +1675,7 @@ static struct seq_operations tracer_seq_ops = { static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, int *ret) { + long cpu_file = (long) inode->i_private; struct trace_iterator *iter; struct seq_file *m; int cpu; @@ -1672,9 +1695,10 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (current_trace && current_trace->print_max) iter->tr = &max_tr; else - iter->tr = inode->i_private; + iter->tr = &global_trace; iter->trace = current_trace; iter->pos = -1; + iter->cpu_file = cpu_file; /* Notify the tracer early; before we stop tracing. */ if (iter->trace && iter->trace->open) @@ -1684,14 +1708,22 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; + if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { + for_each_tracing_cpu(cpu) { - for_each_tracing_cpu(cpu) { + iter->buffer_iter[cpu] = + ring_buffer_read_start(iter->tr->buffer, cpu); + if (!iter->buffer_iter[cpu]) + goto fail_buffer; + } + } else { + cpu = iter->cpu_file; iter->buffer_iter[cpu] = - ring_buffer_read_start(iter->tr->buffer, cpu); + ring_buffer_read_start(iter->tr->buffer, cpu); if (!iter->buffer_iter[cpu]) - goto fail_buffer; + goto fail; } /* TODO stop tracer */ @@ -1715,6 +1747,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } +fail: mutex_unlock(&trace_types_lock); kfree(iter); @@ -2325,54 +2358,77 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, return cnt; } -static atomic_t tracing_reader; - static int tracing_open_pipe(struct inode *inode, struct file *filp) { + long cpu_file = (long) inode->i_private; struct trace_iterator *iter; + int ret = 0; if (tracing_disabled) return -ENODEV; - /* We only allow for reader of the pipe */ - if (atomic_inc_return(&tracing_reader) != 1) { - atomic_dec(&tracing_reader); - return -EBUSY; + mutex_lock(&trace_types_lock); + + /* We only allow one reader per cpu */ + if (cpu_file == TRACE_PIPE_ALL_CPU) { + if (!cpumask_empty(tracing_reader_cpumask)) { + ret = -EBUSY; + goto out; + } + cpumask_setall(tracing_reader_cpumask); + } else { + if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) + cpumask_set_cpu(cpu_file, tracing_reader_cpumask); + else { + ret = -EBUSY; + goto out; + } } /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return -ENOMEM; + if (!iter) { + ret = -ENOMEM; + goto out; + } if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { kfree(iter); - return -ENOMEM; + ret = -ENOMEM; + goto out; } - mutex_lock(&trace_types_lock); - /* trace pipe does not show start of buffer */ cpumask_setall(iter->started); + iter->cpu_file = cpu_file; iter->tr = &global_trace; iter->trace = current_trace; filp->private_data = iter; if (iter->trace->pipe_open) iter->trace->pipe_open(iter); - mutex_unlock(&trace_types_lock); - return 0; +out: + mutex_unlock(&trace_types_lock); + return ret; } static int tracing_release_pipe(struct inode *inode, struct file *file) { struct trace_iterator *iter = file->private_data; + mutex_lock(&trace_types_lock); + + if (iter->cpu_file == TRACE_PIPE_ALL_CPU) + cpumask_clear(tracing_reader_cpumask); + else + cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); + + mutex_unlock(&trace_types_lock); + free_cpumask_var(iter->started); kfree(iter); - atomic_dec(&tracing_reader); return 0; } @@ -2911,6 +2967,59 @@ struct dentry *tracing_init_dentry(void) return d_tracer; } +static struct dentry *d_percpu; + +struct dentry *tracing_dentry_percpu(void) +{ + static int once; + struct dentry *d_tracer; + + if (d_percpu) + return d_percpu; + + d_tracer = tracing_init_dentry(); + + if (!d_tracer) + return NULL; + + d_percpu = debugfs_create_dir("per_cpu", d_tracer); + + if (!d_percpu && !once) { + once = 1; + pr_warning("Could not create debugfs directory 'per_cpu'\n"); + return NULL; + } + + return d_percpu; +} + +static void tracing_init_debugfs_percpu(long cpu) +{ + struct dentry *d_percpu = tracing_dentry_percpu(); + struct dentry *entry; + /* strlen(trace_pipe) + MAX(log10(cpu)) + '\0' */ + char filename[17]; + + if (cpu > 999 || cpu < 0) + return; + + /* per cpu trace_pipe */ + sprintf(filename, "trace_pipe%ld", cpu); + + entry = debugfs_create_file(filename, 0444, d_percpu, + (void *) cpu, &tracing_pipe_fops); + if (!entry) + pr_warning("Could not create debugfs '%s' entry\n", filename); + + /* per cpu trace */ + sprintf(filename, "trace%ld", cpu); + + entry = debugfs_create_file(filename, 0444, d_percpu, + (void *) cpu, &tracing_fops); + if (!entry) + pr_warning("Could not create debugfs '%s' entry\n", filename); +} + #ifdef CONFIG_FTRACE_SELFTEST /* Let selftest have access to static functions in this file */ #include "trace_selftest.c" @@ -2920,6 +3029,7 @@ static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; struct dentry *entry; + int cpu; d_tracer = tracing_init_dentry(); @@ -2939,7 +3049,7 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); entry = debugfs_create_file("trace", 0444, d_tracer, - &global_trace, &tracing_fops); + (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); if (!entry) pr_warning("Could not create debugfs 'trace' entry\n"); @@ -2970,8 +3080,8 @@ static __init int tracer_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs 'README' entry\n"); - entry = debugfs_create_file("trace_pipe", 0644, d_tracer, - NULL, &tracing_pipe_fops); + entry = debugfs_create_file("trace_pipe", 0444, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); if (!entry) pr_warning("Could not create debugfs " "'trace_pipe' entry\n"); @@ -2999,6 +3109,10 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_SYSPROF_TRACER init_tracer_sysprof_debugfs(d_tracer); #endif + + for_each_tracing_cpu(cpu) + tracing_init_debugfs_percpu(cpu); + return 0; } @@ -3222,8 +3336,12 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; + if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) + goto out_free_tracing_cpumask; + cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(tracing_cpumask, cpu_all_mask); + cpumask_clear(tracing_reader_cpumask); /* TODO: make the number of buffers hot pluggable with CPUS */ global_trace.buffer = ring_buffer_alloc(trace_buf_size, @@ -3272,6 +3390,8 @@ __init static int tracer_alloc_buffers(void) ret = 0; out_free_cpumask: + free_cpumask_var(tracing_reader_cpumask); +out_free_tracing_cpumask: free_cpumask_var(tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index eed732c151f..508235a39da 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,8 @@ struct trace_seq { unsigned int readpos; }; +#define TRACE_PIPE_ALL_CPU -1 + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: @@ -404,6 +406,7 @@ struct trace_iterator { struct tracer *trace; void *private; struct ring_buffer_iter *buffer_iter[NR_CPUS]; + int cpu_file; /* The below is zeroed out in pipe_read */ struct trace_seq seq; -- cgit v1.2.3-70-g09d2 From d7350c3f45694104e820041969c8185c5f99e57c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Feb 2009 06:13:16 +0100 Subject: tracing/core: make the read callbacks reentrants Now that several per-cpu files can be read or spliced at the same, we want the read/splice callbacks for tracing files to be reentrants. Until now, a single global mutex (trace_types_lock) serialized the access to tracing_read_pipe(), tracing_splice_read_pipe(), and the seq helpers. Ie: it means that if a user tries to read trace_pipe0 and trace_pipe1 at the same time, the access to the function tracing_read_pipe() is contended and one reader must wait for the other to finish its read call. The trace_type_lock mutex is mostly here to serialize the access to the global current tracer (current_trace), which can be changed concurrently. Although the iter struct keeps a private pointer to this tracer, its callbacks can be changed by another function. The method used here is to not keep anymore private reference to the tracer inside the iterator but to make a copy of it inside the iterator. Then it checks on subsequents read calls if the tracer has changed. This is not costly because the current tracer is not expected to be changed often, so we use a branch prediction for that. Moreover, we add a private mutex to the iterator (there is one iterator per file descriptor) to serialize the accesses in case of multiple consumers per file descriptor (which would be a silly idea from the user). Note that this is not to protect the ring buffer, since the ring buffer already serializes the readers accesses. This is to prevent from traces weirdness in case of concurrent consumers. But these mutexes can be dropped anyway, that would not result in any crash. Just tell me what you think about it. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 101 ++++++++++++++++++++++++++++++++++++++++++--------- kernel/trace/trace.h | 3 +- 2 files changed, 85 insertions(+), 19 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aa58b7bc847..d8d899f3bd6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1294,20 +1294,32 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos) return ent; } +/* + * No necessary locking here. The worst thing which can + * happen is loosing events consumed at the same time + * by a trace_pipe reader. + * Other than that, we don't risk to crash the ring buffer + * because it serializes the readers. + * + * The current tracer is copied to avoid a global locking + * all around. + */ static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; + static struct tracer *old_tracer; int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - - if (!current_trace || current_trace != iter->trace) { - mutex_unlock(&trace_types_lock); - return NULL; + if (unlikely(old_tracer != current_trace && current_trace)) { + old_tracer = current_trace; + *iter->trace = *current_trace; } + mutex_unlock(&trace_types_lock); atomic_inc(&trace_record_cmdline_disabled); @@ -1341,7 +1353,6 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void s_stop(struct seq_file *m, void *p) { atomic_dec(&trace_record_cmdline_disabled); - mutex_unlock(&trace_types_lock); } static void print_lat_help_header(struct seq_file *m) @@ -1691,13 +1702,25 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) goto out; } + /* + * We make a copy of the current tracer to avoid concurrent + * changes on it while we are reading. + */ mutex_lock(&trace_types_lock); + iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); + if (!iter->trace) { + *ret = -ENOMEM; + goto fail; + } + if (current_trace) + *iter->trace = *current_trace; + if (current_trace && current_trace->print_max) iter->tr = &max_tr; else iter->tr = &global_trace; - iter->trace = current_trace; iter->pos = -1; + mutex_init(&iter->mutex); iter->cpu_file = cpu_file; /* Notify the tracer early; before we stop tracing. */ @@ -1747,8 +1770,9 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } -fail: + fail: mutex_unlock(&trace_types_lock); + kfree(iter->trace); kfree(iter); return ERR_PTR(-ENOMEM); @@ -1783,6 +1807,8 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); seq_release(inode, file); + mutex_destroy(&iter->mutex); + kfree(iter->trace); kfree(iter); return 0; } @@ -2392,10 +2418,21 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) goto out; } + /* + * We make a copy of the current tracer to avoid concurrent + * changes on it while we are reading. + */ + iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL); + if (!iter->trace) { + ret = -ENOMEM; + goto fail; + } + if (current_trace) + *iter->trace = *current_trace; + if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { - kfree(iter); ret = -ENOMEM; - goto out; + goto fail; } /* trace pipe does not show start of buffer */ @@ -2403,7 +2440,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->cpu_file = cpu_file; iter->tr = &global_trace; - iter->trace = current_trace; + mutex_init(&iter->mutex); filp->private_data = iter; if (iter->trace->pipe_open) @@ -2412,6 +2449,12 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) out: mutex_unlock(&trace_types_lock); return ret; + +fail: + kfree(iter->trace); + kfree(iter); + mutex_unlock(&trace_types_lock); + return ret; } static int tracing_release_pipe(struct inode *inode, struct file *file) @@ -2428,6 +2471,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); + mutex_destroy(&iter->mutex); + kfree(iter->trace); kfree(iter); return 0; @@ -2497,18 +2542,15 @@ static int tracing_wait_pipe(struct file *filp) return -EAGAIN; } - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); iter->trace->wait_pipe(iter); - mutex_lock(&trace_types_lock); + mutex_lock(&iter->mutex); if (signal_pending(current)) return -EINTR; - if (iter->trace != current_trace) - return 0; - /* * We block until we read something and tracing is disabled. * We still block if tracing is disabled, but we have never @@ -2533,6 +2575,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; + static struct tracer *old_tracer; ssize_t sret; /* return any leftover data */ @@ -2542,7 +2585,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, trace_seq_reset(&iter->seq); + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); + if (unlikely(old_tracer != current_trace && current_trace)) { + old_tracer = current_trace; + *iter->trace = *current_trace; + } + mutex_unlock(&trace_types_lock); + + /* + * Avoid more than one consumer on a single file descriptor + * This is just a matter of traces coherency, the ring buffer itself + * is protected. + */ + mutex_lock(&iter->mutex); if (iter->trace->read) { sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); if (sret) @@ -2599,7 +2655,7 @@ waitagain: goto waitagain; out: - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); return sret; } @@ -2676,11 +2732,20 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; + static struct tracer *old_tracer; ssize_t ret; size_t rem; unsigned int i; + /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); + if (unlikely(old_tracer != current_trace && current_trace)) { + old_tracer = current_trace; + *iter->trace = *current_trace; + } + mutex_unlock(&trace_types_lock); + + mutex_lock(&iter->mutex); if (iter->trace->splice_read) { ret = iter->trace->splice_read(iter, filp, @@ -2720,14 +2785,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_seq_reset(&iter->seq); } - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); spd.nr_pages = i; return splice_to_pipe(pipe, &spd); out_err: - mutex_unlock(&trace_types_lock); + mutex_unlock(&iter->mutex); return ret; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 508235a39da..632191770aa 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -405,8 +405,9 @@ struct trace_iterator { struct trace_array *tr; struct tracer *trace; void *private; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; /* The below is zeroed out in pipe_read */ struct trace_seq seq; -- cgit v1.2.3-70-g09d2 From ef5580d0fffce6e0a01043bac0625128b5d409a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 19:38:04 -0500 Subject: tracing: add interface to write into current tracer buffer Right now all tracers must manage their own trace buffers. This was to enforce tracers to be independent in case we finally decide to allow each tracer to have their own trace buffer. But now we are adding event tracing that writes to the current tracer's buffer. This adds an interface to allow events to write to the current tracer buffer without having to manage its own. Since event tracing has no "tracer", and is just a way to hook into any other tracer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 14 ++++++++++++++ kernel/trace/trace.h | 6 ++++++ 2 files changed, 20 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9c5987aca74..c5e39cd7310 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -846,6 +846,20 @@ void trace_buffer_unlock_commit(struct trace_array *tr, trace_wake_up(); } +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc) +{ + return trace_buffer_lock_reserve(&global_trace, + type, len, flags, pc); +} + +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + return trace_buffer_unlock_commit(&global_trace, event, flags, pc); +} + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 632191770aa..adf161f6dd1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -442,6 +442,12 @@ void trace_buffer_unlock_commit(struct trace_array *tr, struct ring_buffer_event *event, unsigned long flags, int pc); +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); -- cgit v1.2.3-70-g09d2 From c32e827b25054cb17b79cf97fb5e63ae4ce2223c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 19:12:30 -0500 Subject: tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/events.c | 6 +- kernel/trace/trace.h | 19 ++++ kernel/trace/trace_events.c | 2 +- kernel/trace/trace_events.h | 57 ---------- kernel/trace/trace_events_stage_1.h | 34 ++++++ kernel/trace/trace_events_stage_2.h | 72 ++++++++++++ kernel/trace/trace_events_stage_3.h | 219 ++++++++++++++++++++++++++++++++++++ 7 files changed, 350 insertions(+), 59 deletions(-) delete mode 100644 kernel/trace/trace_events.h create mode 100644 kernel/trace/trace_events_stage_1.h create mode 100644 kernel/trace/trace_events_stage_2.h create mode 100644 kernel/trace/trace_events_stage_3.h (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 4e4e45860c5..f2509cbaace 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -8,6 +8,10 @@ #include -#include "trace_events.h" +#include "trace_output.h" + +#include "trace_events_stage_1.h" +#include "trace_events_stage_2.h" +#include "trace_events_stage_3.h" #include diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index adf161f6dd1..aa1ab0cb80a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,4 +726,23 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +struct ftrace_event_call { + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + struct dentry *raw_dir; + int raw_enabled; + int (*raw_init)(void); + int (*raw_reg)(void); + void (*raw_unreg)(void); +}; + +void event_trace_printk(unsigned long ip, const char *fmt, ...); +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b811eb34352..77a5c02bd63 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -10,7 +10,7 @@ #include #include -#include "trace_events.h" +#include "trace.h" #define TRACE_SYSTEM "TRACE_SYSTEM" diff --git a/kernel/trace/trace_events.h b/kernel/trace/trace_events.h deleted file mode 100644 index b015d7b1987..00000000000 --- a/kernel/trace/trace_events.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef _LINUX_KERNEL_TRACE_EVENTS_H -#define _LINUX_KERNEL_TRACE_EVENTS_H - -#include -#include -#include "trace.h" - -struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); -}; - - -#undef TPFMT -#define TPFMT(fmt, args...) fmt "\n", ##args - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (!ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = STR(TRACE_SYSTEM), \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ -} - -void event_trace_printk(unsigned long ip, const char *fmt, ...); -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; - -#endif /* _LINUX_KERNEL_TRACE_EVENTS_H */ diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h new file mode 100644 index 00000000000..fd3bf9382d3 --- /dev/null +++ b/kernel/trace/trace_events_stage_1.h @@ -0,0 +1,34 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in to include the following: + * + * struct ftrace_raw_ { + * struct trace_entry ent; + * ; + * [...] + * }; + * + * The is created by the TRACE_FIELD(type, item, assign) + * macro. We simply do "type item;", and that will create the fields + * in the structure. + */ + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#define TRACE_FIELD(type, item, assign) \ + type item; + +#include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h new file mode 100644 index 00000000000..3eaaef5f19e --- /dev/null +++ b/kernel/trace/trace_events_stage_2.h @@ -0,0 +1,72 @@ +/* + * Stage 2 of the trace events. + * + * Override the macros in to include the following: + * + * enum print_line_t + * ftrace_raw_output_(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_ *field; <-- defined in stage 1 + * struct trace_entry *entry; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * ret = trace_seq_printf(s, "%s", "\n"); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + field->item, + + +#undef TPRAWFMT +#define TPRAWFMT(args...) args + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h new file mode 100644 index 00000000000..7a161c49deb --- /dev/null +++ b/kernel/trace/trace_events_stage_3.h @@ -0,0 +1,219 @@ +/* + * Stage 3 of the trace events. + * + * Override the macros in to include the following: + * + * static void ftrace_event_(proto) + * { + * event_trace_printk(_RET_IP_, "() " ); + * } + * + * static int ftrace_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_event_); + * } + * + * For those macros defined with TRACE_FORMAT: + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * } + * + * + * For those macros defined with TRACE_EVENT_FORMAT: + * + * static struct ftrace_event_call event_; + * + * static void ftrace_raw_event_(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the TRACE_FIELD. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_raw_event_); + * } + * + * static struct trace_event ftrace_event_type_ = { + * .trace = ftrace_raw_output_, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * .raw_init = ftrace_raw_init_event_, + * .raw_reg = ftrace_raw_reg_event_, + * .raw_unreg = ftrace_raw_unreg_event_, + * } + * + */ + +#undef TPFMT +#define TPFMT(fmt, args...) fmt "\n", ##args + +#define _TRACE_FORMAT(call, proto, args, fmt) \ +static void ftrace_event_##call(proto) \ +{ \ + event_trace_printk(_RET_IP_, "(" #call ") " fmt); \ +} \ + \ +static int ftrace_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_event_##call); \ + if (!ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call); \ + return ret; \ +} \ + \ +static void ftrace_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_event_##call); \ +} \ + + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = STR(TRACE_SYSTEM), \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ +} + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(struct ftrace_raw_##call), \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + tstruct; \ + \ + trace_current_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (!ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = STR(TRACE_SYSTEM), \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .raw_reg = ftrace_raw_reg_event_##call, \ + .raw_unreg = ftrace_raw_unreg_event_##call, \ +} -- cgit v1.2.3-70-g09d2 From fd99498989f3b3feeab89dcadf537138ba136d24 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 28 Feb 2009 02:41:25 -0500 Subject: tracing: add raw fast tracing interface for trace events This patch adds the interface to enable the C style trace points. In the directory /debugfs/tracing/events/subsystem/event We now have three files: enable : values 0 or 1 to enable or disable the trace event. available_types: values 'raw' and 'printf' which indicate the tracing types available for the trace point. If a developer does not use the TRACE_EVENT_FORMAT macro and just uses the TRACE_FORMAT macro, then only 'printf' will be available. This file is read only. type: values 'raw' or 'printf'. This indicates which type of tracing is active for that trace point. 'printf' is the default and if 'raw' is not available, this file is read only. # echo raw > /debug/tracing/events/sched/sched_wakeup/type # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable Will enable the C style tracing for the sched_wakeup trace point. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 7 ++ kernel/trace/trace_events.c | 199 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 181 insertions(+), 25 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index aa1ab0cb80a..f6fa0b9f83a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,6 +726,12 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* trace event type bit fields, not numeric */ +enum { + TRACE_EVENT_TYPE_PRINTF = 1, + TRACE_EVENT_TYPE_RAW = 2, +}; + struct ftrace_event_call { char *name; char *system; @@ -736,6 +742,7 @@ struct ftrace_event_call { int id; struct dentry *raw_dir; int raw_enabled; + int type; int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 77a5c02bd63..1d07f800a9c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -44,6 +44,36 @@ static void ftrace_clear_events(void) } } +static void ftrace_event_enable_disable(struct ftrace_event_call *call, + int enable) +{ + + switch (enable) { + case 0: + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + } + if (call->raw_enabled) { + call->raw_enabled = 0; + call->raw_unreg(); + } + break; + case 1: + if (!call->enabled && + (call->type & TRACE_EVENT_TYPE_PRINTF)) { + call->enabled = 1; + call->regfunc(); + } + if (!call->raw_enabled && + (call->type & TRACE_EVENT_TYPE_RAW)) { + call->raw_enabled = 1; + call->raw_reg(); + } + break; + } +} + static int ftrace_set_clr_event(char *buf, int set) { struct ftrace_event_call *call = __start_ftrace_events; @@ -90,19 +120,8 @@ static int ftrace_set_clr_event(char *buf, int set) if (event && strcmp(event, call->name) != 0) continue; - if (set) { - /* Already set? */ - if (call->enabled) - return 0; - call->enabled = 1; - call->regfunc(); - } else { - /* Already cleared? */ - if (!call->enabled) - return 0; - call->enabled = 0; - call->unregfunc(); - } + ftrace_event_enable_disable(call, set); + ret = 0; } return ret; @@ -273,7 +292,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled) + if (call->enabled || call->raw_enabled) buf = "1\n"; else buf = "0\n"; @@ -304,18 +323,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, switch (val) { case 0: - if (!call->enabled) - break; - - call->enabled = 0; - call->unregfunc(); - break; case 1: - if (call->enabled) - break; - - call->enabled = 1; - call->regfunc(); + ftrace_event_enable_disable(call, val); break; default: @@ -327,6 +336,107 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +event_type_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[16]; + int r = 0; + + if (call->type & TRACE_EVENT_TYPE_PRINTF) + r += sprintf(buf, "printf\n"); + + if (call->type & TRACE_EVENT_TYPE_RAW) + r += sprintf(buf+r, "raw\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[64]; + + /* + * If there's only one type, we can't change it. + * And currently we always have printf type, and we + * may or may not have raw type. + * + * This is a redundant check, the file should be read + * only if this is the case anyway. + */ + + if (!call->raw_init) + return -EPERM; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (!strncmp(buf, "printf", 6) && + (!buf[6] || isspace(buf[6]))) { + + call->type = TRACE_EVENT_TYPE_PRINTF; + + /* + * If raw enabled, the disable it and enable + * printf type. + */ + if (call->raw_enabled) { + call->raw_enabled = 0; + call->raw_unreg(); + + call->enabled = 1; + call->regfunc(); + } + + } else if (!strncmp(buf, "raw", 3) && + (!buf[3] || isspace(buf[3]))) { + + call->type = TRACE_EVENT_TYPE_RAW; + + /* + * If printf enabled, the disable it and enable + * raw type. + */ + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + + call->raw_enabled = 1; + call->raw_reg(); + } + } else + return -EINVAL; + + *ppos += cnt; + + return cnt; +} + +static ssize_t +event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[16]; + int r = 0; + + r += sprintf(buf, "printf\n"); + + if (call->raw_init) + r += sprintf(buf+r, "raw\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -362,6 +472,17 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; +static const struct file_operations ftrace_type_fops = { + .open = tracing_open_generic, + .read = event_type_read, + .write = event_type_write, +}; + +static const struct file_operations ftrace_available_types_fops = { + .open = tracing_open_generic, + .read = event_available_types_read, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -427,6 +548,7 @@ static int event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) { struct dentry *entry; + int ret; /* * If the trace point header did not define TRACE_SYSTEM @@ -435,6 +557,18 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) if (strcmp(call->system, "TRACE_SYSTEM") != 0) d_events = event_subsystem_dir(call->system, d_events); + if (call->raw_init) { + ret = call->raw_init(); + if (ret < 0) { + pr_warning("Could not initialize trace point" + " events/%s\n", call->name); + return ret; + } + } + + /* default the output to printf */ + call->type = TRACE_EVENT_TYPE_PRINTF; + call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -448,6 +582,21 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) pr_warning("Could not create debugfs " "'%s/enable' entry\n", call->name); + /* Only let type be writable, if we can change it */ + entry = debugfs_create_file("type", + call->raw_init ? 0644 : 0444, + call->dir, call, + &ftrace_type_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/type' entry\n", call->name); + + entry = debugfs_create_file("available_types", 0444, call->dir, call, + &ftrace_available_types_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/type' available_types\n", call->name); + return 0; } -- cgit v1.2.3-70-g09d2 From f9520750c4c9924c14325cd951efae5fae58104c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 14:04:40 -0500 Subject: tracing: make trace_seq_reset global and rename to trace_seq_init Impact: clean up The trace_seq functions may be used separately outside of the ftrace iterator. The trace_seq_reset is needed for these operations. This patch also renames trace_seq_reset to the more appropriate trace_seq_init. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 17 +++++------------ kernel/trace/trace.h | 8 ++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c5e39cd7310..ea055aa21cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -342,13 +342,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(tsk); } -static void -trace_seq_reset(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; -} - ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) { int len; @@ -395,7 +388,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) s->buffer[len] = 0; seq_puts(m, s->buffer); - trace_seq_reset(s); + trace_seq_init(s); } /** @@ -2620,7 +2613,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (sret != -EBUSY) return sret; - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); @@ -2682,7 +2675,7 @@ waitagain: /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (iter->seq.readpos >= iter->seq.len) - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); /* * If there was nothing to send to user, inspite of consuming trace @@ -2819,7 +2812,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, partial[i].offset = 0; partial[i].len = iter->seq.len; - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); } mutex_unlock(&iter->mutex); @@ -3631,7 +3624,7 @@ trace_printk_seq(struct trace_seq *s) printk(KERN_TRACE "%s", s->buffer); - trace_seq_reset(s); + trace_seq_init(s); } void ftrace_dump(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f6fa0b9f83a..cf6ba4181b1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,14 @@ struct trace_seq { unsigned int readpos; }; +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + + #define TRACE_PIPE_ALL_CPU -1 /* -- cgit v1.2.3-70-g09d2 From 981d081ec8b958b7d962ee40d433581a55d40fc5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 13:53:59 -0500 Subject: tracing: add format file to describe event struct fields This patch adds the "format" file to the trace point event directory. This is based off of work by Tom Zanussi, in which a file is exported to be tread from user land such that a user space app may read the binary record stored in the ring buffer. # cat /debug/tracing/events/sched/sched_switch/format field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 56 ++++++++++++++++++++++++++++++++++++- kernel/trace/trace_events_stage_2.h | 52 ++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 2 ++ 4 files changed, 110 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cf6ba4181b1..e606633fb49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -754,6 +754,7 @@ struct ftrace_event_call { int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); + int (*show_format)(struct trace_seq *s); }; void event_trace_printk(unsigned long ip, const char *fmt, ...); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 26069fa6b3b..d57a772981c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3,6 +3,9 @@ * * Copyright (C) 2008 Red Hat Inc, Steven Rostedt * + * - Added format output of fields of the trace point. + * This was based off of work by Tom Zanussi . + * */ #include @@ -444,6 +447,42 @@ event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +static ssize_t +event_format_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct trace_seq *s; + char *buf; + int r; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + if (*ppos) + return 0; + + r = call->show_format(s); + if (!r) { + /* + * ug! The format output is bigger than a PAGE!! + */ + buf = "FORMAT TOO BIG\n"; + r = simple_read_from_buffer(ubuf, cnt, ppos, + buf, strlen(buf)); + goto out; + } + + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, s->len); + out: + kfree(s); + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -490,6 +529,11 @@ static const struct file_operations ftrace_available_types_fops = { .read = event_available_types_read, }; +static const struct file_operations ftrace_event_format_fops = { + .open = tracing_open_generic, + .read = event_format_read, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -602,7 +646,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) &ftrace_available_types_fops); if (!entry) pr_warning("Could not create debugfs " - "'%s/type' available_types\n", call->name); + "'%s/available_types' entry\n", call->name); + + /* A trace may not want to export its format */ + if (!call->show_format) + return 0; + + entry = debugfs_create_file("format", 0444, call->dir, call, + &ftrace_event_format_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/format' entry\n", call->name); return 0; } diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index dc79fe3a2ec..3a80ea4e92c 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -74,3 +74,55 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ } #include + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + return ret; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 2ab65e95822..c62a4d2a528 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -101,6 +101,7 @@ * .raw_init = ftrace_raw_init_event_, * .raw_reg = ftrace_raw_reg_event_, * .raw_unreg = ftrace_raw_unreg_event_, + * .show_format = ftrace_format_, * } * */ @@ -230,4 +231,5 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .raw_reg = ftrace_raw_reg_event_##call, \ .raw_unreg = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ } -- cgit v1.2.3-70-g09d2 From 2cadf9135eb3b6d84b6427314be827ddd443c308 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Dec 2008 22:20:19 -0500 Subject: tracing: add binary buffer files for use with splice Impact: new feature This patch creates a directory of files that correspond to the per CPU ring buffers. These are binary files and are made to be used with splice. This is the fastest way to extract data from the ftrace ring buffers. Thanks to Jiaying Zhang for pushing me to get this code fixed, and to Eduard - Gabriel Munteanu for his splice code that helped me debug my code. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++-- kernel/trace/trace.h | 1 + 2 files changed, 268 insertions(+), 7 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea055aa21cd..12539f72f4a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -11,31 +11,30 @@ * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 William Lee Irwin III */ +#include #include +#include +#include #include #include #include +#include #include #include #include #include #include +#include #include #include #include +#include #include #include #include #include #include #include -#include -#include -#include - -#include -#include -#include #include "trace.h" #include "trace_output.h" @@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = { .write = tracing_mark_write, }; +struct ftrace_buffer_info { + struct trace_array *tr; + void *spare; + int cpu; + unsigned int read; +}; + +static int tracing_buffers_open(struct inode *inode, struct file *filp) +{ + int cpu = (int)(long)inode->i_private; + struct ftrace_buffer_info *info; + + if (tracing_disabled) + return -ENODEV; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->tr = &global_trace; + info->cpu = cpu; + info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + /* Force reading ring buffer for first read */ + info->read = (unsigned int)-1; + if (!info->spare) + goto out; + + filp->private_data = info; + + return 0; + + out: + kfree(info); + return -ENOMEM; +} + +static ssize_t +tracing_buffers_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct ftrace_buffer_info *info = filp->private_data; + unsigned int pos; + ssize_t ret; + size_t size; + + /* Do we have previous read data to read? */ + if (info->read < PAGE_SIZE) + goto read; + + info->read = 0; + + ret = ring_buffer_read_page(info->tr->buffer, + &info->spare, + count, + info->cpu, 0); + if (ret < 0) + return 0; + + pos = ring_buffer_page_len(info->spare); + + if (pos < PAGE_SIZE) + memset(info->spare + pos, 0, PAGE_SIZE - pos); + +read: + size = PAGE_SIZE - info->read; + if (size > count) + size = count; + + ret = copy_to_user(ubuf, info->spare + info->read, size); + if (ret) + return -EFAULT; + *ppos += size; + info->read += size; + + return size; +} + +static int tracing_buffers_release(struct inode *inode, struct file *file) +{ + struct ftrace_buffer_info *info = file->private_data; + + ring_buffer_free_read_page(info->tr->buffer, info->spare); + kfree(info); + + return 0; +} + +struct buffer_ref { + struct ring_buffer *buffer; + void *page; + int ref; +}; + +static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + + if (--ref->ref) + return; + + ring_buffer_free_read_page(ref->buffer, ref->page); + kfree(ref); + buf->private = 0; +} + +static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + + ref->ref++; +} + +/* Pipe buffer operations for a buffer. */ +static struct pipe_buf_operations buffer_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = buffer_pipe_buf_release, + .steal = buffer_pipe_buf_steal, + .get = buffer_pipe_buf_get, +}; + +/* + * Callback from splice_to_pipe(), if we need to release some pages + * at the end of the spd in case we error'ed out in filling the pipe. + */ +static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) +{ + struct buffer_ref *ref = + (struct buffer_ref *)spd->partial[i].private; + + if (--ref->ref) + return; + + ring_buffer_free_read_page(ref->buffer, ref->page); + kfree(ref); + spd->partial[i].private = 0; +} + +static ssize_t +tracing_buffers_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + struct ftrace_buffer_info *info = file->private_data; + struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &buffer_pipe_buf_ops, + .spd_release = buffer_spd_release, + }; + struct buffer_ref *ref; + int size, i; + size_t ret; + + /* + * We can't seek on a buffer input + */ + if (unlikely(*ppos)) + return -ESPIPE; + + + for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { + struct page *page; + int r; + + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + break; + + ref->buffer = info->tr->buffer; + ref->page = ring_buffer_alloc_read_page(ref->buffer); + if (!ref->page) { + kfree(ref); + break; + } + + r = ring_buffer_read_page(ref->buffer, &ref->page, + len, info->cpu, 0); + if (r < 0) { + ring_buffer_free_read_page(ref->buffer, + ref->page); + kfree(ref); + break; + } + + /* + * zero out any left over data, this is going to + * user land. + */ + size = ring_buffer_page_len(ref->page); + if (size < PAGE_SIZE) + memset(ref->page + size, 0, PAGE_SIZE - size); + + page = virt_to_page(ref->page); + + spd.pages[i] = page; + spd.partial[i].len = PAGE_SIZE; + spd.partial[i].offset = 0; + spd.partial[i].private = (unsigned long)ref; + spd.nr_pages++; + } + + spd.nr_pages = i; + + /* did we read anything? */ + if (!spd.nr_pages) { + if (flags & SPLICE_F_NONBLOCK) + ret = -EAGAIN; + else + ret = 0; + /* TODO: block */ + return ret; + } + + ret = splice_to_pipe(pipe, &spd); + + return ret; +} + +static const struct file_operations tracing_buffers_fops = { + .open = tracing_buffers_open, + .read = tracing_buffers_read, + .release = tracing_buffers_release, + .splice_read = tracing_buffers_splice_read, + .llseek = no_llseek, +}; + #ifdef CONFIG_DYNAMIC_FTRACE int __weak ftrace_arch_read_dyn_info(char *buf, int size) @@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void) static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; + struct dentry *buffers; struct dentry *entry; int cpu; @@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_marker' entry\n"); + buffers = debugfs_create_dir("binary_buffers", d_tracer); + + if (!buffers) + pr_warning("Could not create buffers directory\n"); + else { + int cpu; + char buf[64]; + + for_each_tracing_cpu(cpu) { + sprintf(buf, "%d", cpu); + + entry = debugfs_create_file(buf, 0444, buffers, + (void *)(long)cpu, + &tracing_buffers_fops); + if (!entry) + pr_warning("Could not create debugfs buffers " + "'%s' entry\n", buf); + } + } + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e606633fb49..561bb5c5d98 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -217,6 +217,7 @@ enum trace_flag_type { */ struct trace_array_cpu { atomic_t disabled; + void *buffer_page; /* ring buffer spare */ /* these fields get copied into max-trace: */ unsigned long trace_idx; -- cgit v1.2.3-70-g09d2 From c032ef64d680717e4e8ce3da65da6419a35f8a2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 20:34:24 -0500 Subject: tracing: add latency output format option With the removal of the latency_trace file, we lost the ability to see some of the finer details in a trace. Like the state of interrupts enabled, the preempt count, need resched, and if we are in an interrupt handler, softirq handler or not. This patch simply creates an option to bring back the old format. This also removes the warning about an unused variable that held the latency_trace file operations. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 24 ++---------------------- kernel/trace/trace.h | 3 ++- 2 files changed, 4 insertions(+), 23 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2e53e6f0944..55fcbb56795 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -299,6 +299,7 @@ static const char *trace_options[] = { "sym-userobj", "printk-msg-only", "context-info", + "latency-format", NULL }; @@ -1829,26 +1830,12 @@ static int tracing_open(struct inode *inode, struct file *file) iter = __tracing_open(inode, file); if (IS_ERR(iter)) ret = PTR_ERR(iter); - - return ret; -} - -static int tracing_lt_open(struct inode *inode, struct file *file) -{ - struct trace_iterator *iter; - int ret = 0; - - iter = __tracing_open(inode, file); - - if (IS_ERR(iter)) - ret = PTR_ERR(iter); - else + else if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; return ret; } - static void * t_next(struct seq_file *m, void *v, loff_t *pos) { @@ -1927,13 +1914,6 @@ static struct file_operations tracing_fops = { .release = tracing_release, }; -static struct file_operations tracing_lt_fops = { - .open = tracing_lt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = tracing_release, -}; - static struct file_operations show_traces_fops = { .open = show_traces_open, .read = seq_read, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 561bb5c5d98..12cd119cca3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -651,7 +651,8 @@ enum trace_iterator_flags { TRACE_ITER_USERSTACKTRACE = 0x4000, TRACE_ITER_SYM_USEROBJ = 0x8000, TRACE_ITER_PRINTK_MSGONLY = 0x10000, - TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */ + TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ + TRACE_ITER_LATENCY_FMT = 0x40000, }; /* -- cgit v1.2.3-70-g09d2 From 5e1607a00bd082972629d3d68c95c8bcf902b55a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 10:24:48 +0100 Subject: tracing: rename ftrace_printk() => trace_printk() Impact: cleanup Use a more generic name - this also allows the prototype to move to kernel.h and be generally available to kernel developers who want to do some quick tracing. Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 6 +++--- include/linux/ftrace.h | 18 +++++++++--------- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace.h | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 2041ee951c1..22614bef635 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -1466,11 +1466,11 @@ want, depending on your needs. You can put some comments on specific functions by using -ftrace_printk() For example, if you want to put a comment inside +trace_printk() For example, if you want to put a comment inside the __might_sleep() function, you just have to include - and call ftrace_printk() inside __might_sleep() + and call trace_printk() inside __might_sleep() -ftrace_printk("I'm a comment!\n") +trace_printk("I'm a comment!\n") will produce: diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f69ac7c158..fbb9c364e16 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -329,11 +329,11 @@ extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); /** - * ftrace_printk - printf formatting in the ftrace buffer + * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * - * Note: __ftrace_printk is an internal function for ftrace_printk and - * the @ip is passed in via the ftrace_printk macro. + * Note: __trace_printk is an internal function for trace_printk and + * the @ip is passed in via the trace_printk macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various @@ -341,14 +341,14 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); * where problems are occurring. * * This is intended as a debugging tool for the developer only. - * Please refrain from leaving ftrace_printks scattered around in + * Please refrain from leaving trace_printks scattered around in * your code. */ -# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt) +# define trace_printk(fmt...) __trace_printk(_THIS_IP_, fmt) extern int -__ftrace_printk(unsigned long ip, const char *fmt, ...) +__trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap) +# define ftrace_vprintk(fmt, ap) __trace_printk(_THIS_IP_, fmt, ap) extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(void); @@ -356,13 +356,13 @@ extern void ftrace_dump(void); static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } static inline int -ftrace_printk(const char *fmt, ...) +trace_printk(const char *fmt, ...) { return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d1ef43999d9..c0e9c126339 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -48,7 +48,7 @@ unsigned long __read_mostly tracing_thresh; * We need to change this state when a selftest is running. * A selftest will lurk into the ring-buffer to count the * entries inserted during the selftest although some concurrent - * insertions into the ring-buffer such as ftrace_printk could occurred + * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results. */ static bool __read_mostly tracing_selftest_running; @@ -291,7 +291,7 @@ static const char *trace_options[] = { "block", "stacktrace", "sched-tree", - "ftrace_printk", + "trace_printk", "ftrace_preempt", "branch", "annotate", @@ -3768,7 +3768,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vprintk); -int __ftrace_printk(unsigned long ip, const char *fmt, ...) +int __trace_printk(unsigned long ip, const char *fmt, ...) { int ret; va_list ap; @@ -3781,7 +3781,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(__ftrace_printk); +EXPORT_SYMBOL_GPL(__trace_printk); int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 12cd119cca3..8beff03fda6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -115,7 +115,7 @@ struct userstack_entry { }; /* - * ftrace_printk entry: + * trace_printk entry: */ struct print_entry { struct trace_entry ent; -- cgit v1.2.3-70-g09d2 From 1427cdf0592368bdec57276edaf714040ee8744f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:47 +0100 Subject: tracing: infrastructure for supporting binary record Impact: save on memory for tracing Current tracers are typically using a struct(like struct ftrace_entry, struct ctx_switch_entry, struct special_entr etc...)to record a binary event. These structs can only record a their own kind of events. A new kind of tracer need a new struct and a lot of code too handle it. So we need a generic binary record for events. This infrastructure is for this purpose. [fweisbec@gmail.com: rebase against latest -tip, make it safe while sched tracing as reported by Steven Rostedt] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 ++ kernel/trace/Kconfig | 6 +++ kernel/trace/Makefile | 1 + kernel/trace/trace.c | 56 ++++++++++++++++++++++++++++ kernel/trace/trace.h | 12 ++++++ kernel/trace/trace_bprintk.c | 87 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_output.c | 75 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 240 insertions(+) create mode 100644 kernel/trace/trace_bprintk.c (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 498769425eb..1c9cdca0258 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -223,6 +223,9 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef CONFIG_TRACE_BPRINTK +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); +#endif /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 058d949a321..ad8d3617d0a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -97,6 +97,12 @@ config FUNCTION_GRAPH_TRACER This is done by setting the current return address on the current task structure into a stack of calls. +config TRACE_BPRINTK + bool "Binary printk for tracing" + default y + depends on TRACING + select BINARY_PRINTF + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index f44736c7574..46557ef4c37 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6144acf2b7..ff53509e19f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3792,6 +3792,62 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) } EXPORT_SYMBOL_GPL(__ftrace_vprintk); +/** + * trace_vbprintk - write binary msg to tracing buffer + * + * Caller must insure @fmt are valid when msg is in tracing buffer. + */ +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) +{ + static DEFINE_SPINLOCK(trace_buf_lock); + static u32 trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + struct bprintk_entry *entry; + unsigned long flags; + int resched; + int cpu, len = 0, size, pc; + + if (tracing_disabled || !trace_bprintk_enable) + return 0; + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + spin_lock_irqsave(&trace_buf_lock, flags); + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + if (len > TRACE_BUF_SIZE || len < 0) + goto out_unlock; + + size = sizeof(*entry) + sizeof(u32) * len; + event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; + + memcpy(entry->buf, trace_buf, sizeof(u32) * len); + ring_buffer_unlock_commit(tr->buffer, event); + +out_unlock: + spin_unlock_irqrestore(&trace_buf_lock, flags); + +out: + ftrace_preempt_enable(resched); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vbprintk); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8beff03fda6..0f5077f8f95 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,6 +20,7 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINTK, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -124,6 +125,16 @@ struct print_entry { char buf[]; }; +struct bprintk_entry { + struct trace_entry ent; + unsigned long ip; + const char *fmt; + u32 buf[]; +}; +#ifdef CONFIG_TRACE_BPRINTK +extern int trace_bprintk_enable; +#endif + #define TRACE_OLD_SIZE 88 struct trace_field_cont { @@ -285,6 +296,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c new file mode 100644 index 00000000000..1f8e532c3fb --- /dev/null +++ b/kernel/trace/trace_bprintk.c @@ -0,0 +1,87 @@ +/* + * trace binary printk + * + * Copyright (C) 2008 Lai Jiangshan + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +/* binary printk basic */ +static DEFINE_MUTEX(btrace_mutex); +static int btrace_metadata_count; + +static inline void lock_btrace(void) +{ + mutex_lock(&btrace_mutex); +} + +static inline void unlock_btrace(void) +{ + mutex_unlock(&btrace_mutex); +} + +static void get_btrace_metadata(void) +{ + lock_btrace(); + btrace_metadata_count++; + unlock_btrace(); +} + +static void put_btrace_metadata(void) +{ + lock_btrace(); + btrace_metadata_count--; + unlock_btrace(); +} + +/* events tracer */ +int trace_bprintk_enable; + +static void start_bprintk_trace(struct trace_array *tr) +{ + get_btrace_metadata(); + tracing_reset_online_cpus(tr); + trace_bprintk_enable = 1; +} + +static void stop_bprintk_trace(struct trace_array *tr) +{ + trace_bprintk_enable = 0; + tracing_reset_online_cpus(tr); + put_btrace_metadata(); +} + +static int init_bprintk_trace(struct trace_array *tr) +{ + start_bprintk_trace(tr); + return 0; +} + +static struct tracer bprintk_trace __read_mostly = +{ + .name = "events", + .init = init_bprintk_trace, + .reset = stop_bprintk_trace, + .start = start_bprintk_trace, + .stop = stop_bprintk_trace, +}; + +static __init int init_bprintk(void) +{ + return register_tracer(&bprintk_trace); +} + +device_initcall(init_bprintk); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 306fef84c50..4ab71201862 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +static int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + int len = (PAGE_SIZE - 1) - s->len; + int ret; + + if (!len) + return 0; + + ret = bstr_printf(s->buffer + s->len, len, fmt, binary); + + /* If we can't write it all, don't bother writing anything */ + if (ret >= len) + return 0; + + s->len += ret; + + return len; +} + /** * trace_seq_puts - trace sequence printing of simple string * @s: trace sequence descriptor @@ -855,6 +875,60 @@ static struct trace_event trace_print_event = { .raw = trace_print_raw, }; +/* TRACE_BPRINTK */ +static enum print_line_t +trace_bprintk_print(struct trace_iterator *iter, int flags) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct bprintk_entry *field; + + trace_assign_type(field, entry); + + if (!seq_print_ip_sym(s, field->ip, flags)) + goto partial; + + if (!trace_seq_puts(s, ": ")) + goto partial; + + if (!trace_seq_bprintf(s, field->fmt, field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static enum print_line_t +trace_bprintk_raw(struct trace_iterator *iter, int flags) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct bprintk_entry *field; + + trace_assign_type(field, entry); + + if (!trace_seq_printf(s, ": %lx : ", field->ip)) + goto partial; + + if (!trace_seq_bprintf(s, field->fmt, field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static struct trace_event trace_bprintk_event = { + .type = TRACE_BPRINTK, + .trace = trace_bprintk_print, + .raw = trace_bprintk_raw, + .hex = trace_nop_print, + .binary = trace_nop_print, +}; + static struct trace_event *events[] __initdata = { &trace_fn_event, &trace_ctx_event, @@ -863,6 +937,7 @@ static struct trace_event *events[] __initdata = { &trace_stack_event, &trace_user_stack_event, &trace_print_event, + &trace_bprintk_event, NULL }; -- cgit v1.2.3-70-g09d2 From 769b0441f438c4bb4872cb8560eb6fe51bcc09ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Mar 2009 17:21:49 +0100 Subject: tracing/core: drop the old trace_printk() implementation in favour of trace_bprintk() Impact: faster and lighter tracing Now that we have trace_bprintk() which is faster and consume lesser memory than trace_printk() and has the same purpose, we can now drop the old implementation in favour of the binary one from trace_bprintk(), which means we move all the implementation of trace_bprintk() to trace_printk(), so the Api doesn't change except that we must now use trace_seq_bprintk() to print the TRACE_PRINT entries. Some changes result of this: - Previously, trace_bprintk depended of a single tracer and couldn't work without. This tracer has been dropped and the whole implementation of trace_printk() (like the module formats management) is now integrated in the tracing core (comes with CONFIG_TRACING), though we keep the file trace_printk (previously trace_bprintk.c) where we can find the module management. Thus we don't overflow trace.c - changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries. - change a bit trace_printk/trace_vprintk macros to support non-builtin formats constants, and fix 'const' qualifiers warnings. But this is all transparent for developers. - etc... V2: - Rebase against last changes - Fix mispell on the changelog V3: - Rebase against last changes (moving trace_printk() to kernel.h) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 25 ----- include/linux/kernel.h | 34 +++++- include/linux/module.h | 2 +- kernel/trace/Kconfig | 7 +- kernel/trace/Makefile | 2 +- kernel/trace/trace.c | 212 ++++++++++------------------------- kernel/trace/trace.h | 14 +-- kernel/trace/trace_bprintk.c | 154 ------------------------- kernel/trace/trace_functions_graph.c | 6 +- kernel/trace/trace_mmiotrace.c | 9 +- kernel/trace/trace_output.c | 70 ++---------- kernel/trace/trace_output.h | 2 + kernel/trace/trace_printk.c | 138 +++++++++++++++++++++++ 13 files changed, 262 insertions(+), 413 deletions(-) delete mode 100644 kernel/trace/trace_bprintk.c create mode 100644 kernel/trace/trace_printk.c (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1cc8ca453a9..e1583f2639b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -223,31 +223,6 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef CONFIG_TRACE_BPRINTK -extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); -extern int __trace_bprintk(unsigned long ip, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); - -static inline void ____trace_bprintk_check_format(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); -static inline void ____trace_bprintk_check_format(const char *fmt, ...) {} -#define __trace_bprintk_check_format(fmt, args...) \ -do { \ - if (0) \ - ____trace_bprintk_check_format(fmt, ##args); \ -} while (0) - -#define trace_bprintk(fmt, args...) \ -do { \ - static char *__attribute__((section("__trace_bprintk_fmt"))) \ - trace_bprintk_fmt = fmt; \ - __trace_bprintk_check_format(fmt, ##args); \ - __trace_bprintk(_THIS_IP_, trace_bprintk_fmt, ##args); \ -} while (0) -#else -#define trace_bprintk trace_printk -#endif - /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7aef15c4645..4e726b9a71e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -423,6 +423,16 @@ extern void ftrace_off_permanent(void); extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +static inline void __attribute__ ((format (printf, 1, 2))) +____trace_printk_check_format(const char *fmt, ...) +{ +} +#define __trace_printk_check_format(fmt, args...) \ +do { \ + if (0) \ + ____trace_printk_check_format(fmt, ##args); \ +} while (0) + /** * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing @@ -439,13 +449,31 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); * Please refrain from leaving trace_printks scattered around in * your code. */ -# define trace_printk(fmt...) __trace_printk(_THIS_IP_, fmt) + +#define trace_printk(fmt, args...) \ +do { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))); \ + trace_printk_fmt = fmt; \ + __trace_printk_check_format(fmt, ##args); \ + __trace_printk(_THIS_IP_, trace_printk_fmt, ##args); \ +} while (0) + extern int __trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); -# define ftrace_vprintk(fmt, ap) __trace_printk(_THIS_IP_, fmt, ap) + +#define ftrace_vprintk(fmt, vargs) \ +do { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))); \ + trace_printk_fmt = fmt; \ + __ftrace_vprintk(_THIS_IP_, trace_printk_fmt, vargs); \ +} while (0) + extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); + extern void ftrace_dump(void); #else static inline void @@ -467,7 +495,7 @@ ftrace_vprintk(const char *fmt, va_list ap) return 0; } static inline void ftrace_dump(void) { } -#endif +#endif /* CONFIG_TRACING */ /* * Display an IP address in readable format. diff --git a/include/linux/module.h b/include/linux/module.h index 8cbec972d8e..22d9878e868 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -329,7 +329,7 @@ struct module unsigned int num_tracepoints; #endif -#ifdef CONFIG_TRACE_BPRINTK +#ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ad8d3617d0a..8e4a2a61cd7 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,7 @@ config TRACING select STACKTRACE if STACKTRACE_SUPPORT select TRACEPOINTS select NOP_TRACER + select BINARY_PRINTF # # Minimum requirements an architecture has to meet for us to @@ -97,12 +98,6 @@ config FUNCTION_GRAPH_TRACER This is done by setting the current return address on the current task structure into a stack of calls. -config TRACE_BPRINTK - bool "Binary printk for tracing" - default y - depends on TRACING - select BINARY_PRINTF - config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 46557ef4c37..c7a2943796e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -22,7 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o -obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o +obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 46b3cd7a575..cc94f864248 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1169,6 +1169,67 @@ void trace_graph_return(struct ftrace_graph_ret *trace) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +/** + * trace_vprintk - write binary msg to tracing buffer + * + */ +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +{ + static DEFINE_SPINLOCK(trace_buf_lock); + static u32 trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + struct print_entry *entry; + unsigned long flags; + int resched; + int cpu, len = 0, size, pc; + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + spin_lock_irqsave(&trace_buf_lock, flags); + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + if (len > TRACE_BUF_SIZE || len < 0) + goto out_unlock; + + size = sizeof(*entry) + sizeof(u32) * len; + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->depth = depth; + entry->fmt = fmt; + + memcpy(entry->buf, trace_buf, sizeof(u32) * len); + ring_buffer_unlock_commit(tr->buffer, event); + +out_unlock: + spin_unlock_irqrestore(&trace_buf_lock, flags); + +out: + ftrace_preempt_enable(resched); + unpause_graph_tracing(); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vprintk); + enum trace_file_type { TRACE_FILE_LAT_FMT = 1, TRACE_FILE_ANNOTATE = 2, @@ -1564,7 +1625,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%s", field->buf); + ret = trace_seq_bprintf(s, field->fmt, field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -3714,155 +3775,6 @@ static __init int tracer_init_debugfs(void) return 0; } -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) -{ - static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; - static char trace_buf[TRACE_BUF_SIZE]; - - struct ring_buffer_event *event; - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - int cpu, len = 0, size, pc; - struct print_entry *entry; - unsigned long irq_flags; - - if (tracing_disabled || tracing_selftest_running) - return 0; - - pc = preempt_count(); - preempt_disable_notrace(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (unlikely(atomic_read(&data->disabled))) - goto out; - - pause_graph_tracing(); - raw_local_irq_save(irq_flags); - __raw_spin_lock(&trace_buf_lock); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - len = min(len, TRACE_BUF_SIZE-1); - trace_buf[len] = 0; - - size = sizeof(*entry) + len + 1; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->depth = depth; - - memcpy(&entry->buf, trace_buf, len); - entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event); - - out_unlock: - __raw_spin_unlock(&trace_buf_lock); - raw_local_irq_restore(irq_flags); - unpause_graph_tracing(); - out: - preempt_enable_notrace(); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vprintk); - -int __trace_printk(unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(__trace_printk); - -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) -{ - if (!(trace_flags & TRACE_ITER_PRINTK)) - return 0; - - return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); -} -EXPORT_SYMBOL_GPL(__ftrace_vprintk); - -/** - * trace_vbprintk - write binary msg to tracing buffer - * - * Caller must insure @fmt are valid when msg is in tracing buffer. - */ -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) -{ - static DEFINE_SPINLOCK(trace_buf_lock); - static u32 trace_buf[TRACE_BUF_SIZE]; - - struct ring_buffer_event *event; - struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; - struct bprintk_entry *entry; - unsigned long flags; - int resched; - int cpu, len = 0, size, pc; - - if (tracing_disabled || !trace_bprintk_enable) - return 0; - - pc = preempt_count(); - resched = ftrace_preempt_disable(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - - if (unlikely(atomic_read(&data->disabled))) - goto out; - - spin_lock_irqsave(&trace_buf_lock, flags); - len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - if (len > TRACE_BUF_SIZE || len < 0) - goto out_unlock; - - size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc); - if (!event) - goto out_unlock; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; - - memcpy(entry->buf, trace_buf, sizeof(u32) * len); - ring_buffer_unlock_commit(tr->buffer, event); - -out_unlock: - spin_unlock_irqrestore(&trace_buf_lock, flags); - -out: - ftrace_preempt_enable(resched); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vbprintk); - -int __trace_bprintk(unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!fmt) - return 0; - - va_start(ap, fmt); - ret = trace_vbprintk(ip, fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(__trace_bprintk); - static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0f5077f8f95..6140922392c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,7 +20,6 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, - TRACE_BPRINTK, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -120,16 +119,10 @@ struct userstack_entry { */ struct print_entry { struct trace_entry ent; - unsigned long ip; + unsigned long ip; int depth; - char buf[]; -}; - -struct bprintk_entry { - struct trace_entry ent; - unsigned long ip; - const char *fmt; - u32 buf[]; + const char *fmt; + u32 buf[]; }; #ifdef CONFIG_TRACE_BPRINTK extern int trace_bprintk_enable; @@ -296,7 +289,6 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ - IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c deleted file mode 100644 index f4c245a5cd3..00000000000 --- a/kernel/trace/trace_bprintk.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * trace binary printk - * - * Copyright (C) 2008 Lai Jiangshan - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "trace.h" - -#ifdef CONFIG_MODULES - -/* binary printk basic */ -static DEFINE_MUTEX(btrace_mutex); -/* - * modules trace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt - * which are queued on trace_bprintk_fmt_list. - */ -static LIST_HEAD(trace_bprintk_fmt_list); - -struct trace_bprintk_fmt { - struct list_head list; - char fmt[0]; -}; - - -static inline void lock_btrace(void) -{ - mutex_lock(&btrace_mutex); -} - -static inline void unlock_btrace(void) -{ - mutex_unlock(&btrace_mutex); -} - - -static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) -{ - struct trace_bprintk_fmt *pos; - list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { - if (!strcmp(pos->fmt, fmt)) - return pos; - } - return NULL; -} - -static -void hold_module_trace_bprintk_format(const char **start, const char **end) -{ - const char **iter; - lock_btrace(); - for (iter = start; iter < end; iter++) { - struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); - if (tb_fmt) { - *iter = tb_fmt->fmt; - continue; - } - - tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) - + strlen(*iter) + 1, GFP_KERNEL); - if (tb_fmt) { - list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); - strcpy(tb_fmt->fmt, *iter); - *iter = tb_fmt->fmt; - } else - *iter = NULL; - } - unlock_btrace(); -} - -static int module_trace_bprintk_format_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - if (mod->num_trace_bprintk_fmt) { - const char **start = mod->trace_bprintk_fmt_start; - const char **end = start + mod->num_trace_bprintk_fmt; - - if (val == MODULE_STATE_COMING) - hold_module_trace_bprintk_format(start, end); - } - return 0; -} - -#else /* !CONFIG_MODULES */ -__init static int -module_trace_bprintk_format_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} -#endif /* CONFIG_MODULES */ - - -__initdata_or_module static -struct notifier_block module_trace_bprintk_format_nb = { - .notifier_call = module_trace_bprintk_format_notify, -}; - -/* events tracer */ -int trace_bprintk_enable; - -static void start_bprintk_trace(struct trace_array *tr) -{ - tracing_reset_online_cpus(tr); - trace_bprintk_enable = 1; -} - -static void stop_bprintk_trace(struct trace_array *tr) -{ - trace_bprintk_enable = 0; - tracing_reset_online_cpus(tr); -} - -static int init_bprintk_trace(struct trace_array *tr) -{ - start_bprintk_trace(tr); - return 0; -} - -static struct tracer bprintk_trace __read_mostly = -{ - .name = "events", - .init = init_bprintk_trace, - .reset = stop_bprintk_trace, - .start = start_bprintk_trace, - .stop = stop_bprintk_trace, -}; - -static __init int init_bprintk(void) -{ - int ret = register_module_notifier(&module_trace_bprintk_format_nb); - if (ret) - return ret; - - ret = register_tracer(&bprintk_trace); - if (ret) - unregister_module_notifier(&module_trace_bprintk_format_nb); - return ret; -} - -device_initcall(init_bprintk); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e527f2f66c7..453ebd3b636 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -742,7 +742,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s, } /* The comment */ - ret = trace_seq_printf(s, "/* %s", trace->buf); + ret = trace_seq_printf(s, "/* "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_bprintf(s, trace->fmt, trace->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index c401b908e80..23e346a734c 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -254,15 +254,18 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; struct print_entry *print = (struct print_entry *)entry; - const char *msg = print->buf; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; int ret; /* The trailing newline must be in the message. */ - ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); + ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_bprintf(s, print->fmt, print->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 4ab71201862..ef8fd661b21 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -53,8 +53,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } -static int -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { int len = (PAGE_SIZE - 1) - s->len; int ret; @@ -834,54 +833,12 @@ static struct trace_event trace_user_stack_event = { }; /* TRACE_PRINT */ -static enum print_line_t trace_print_print(struct trace_iterator *iter, - int flags) -{ - struct print_entry *field; - struct trace_seq *s = &iter->seq; - - trace_assign_type(field, iter->ent); - - if (!seq_print_ip_sym(s, field->ip, flags)) - goto partial; - - if (!trace_seq_printf(s, ": %s", field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) -{ - struct print_entry *field; - - trace_assign_type(field, iter->ent); - - if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf)) - goto partial; - - return TRACE_TYPE_HANDLED; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - -static struct trace_event trace_print_event = { - .type = TRACE_PRINT, - .trace = trace_print_print, - .raw = trace_print_raw, -}; - -/* TRACE_BPRINTK */ static enum print_line_t -trace_bprintk_print(struct trace_iterator *iter, int flags) +trace_print_print(struct trace_iterator *iter, int flags) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; - struct bprintk_entry *field; + struct print_entry *field; trace_assign_type(field, entry); @@ -900,14 +857,13 @@ trace_bprintk_print(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } -static enum print_line_t -trace_bprintk_raw(struct trace_iterator *iter, int flags) + +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) { - struct trace_entry *entry = iter->ent; + struct print_entry *field; struct trace_seq *s = &iter->seq; - struct bprintk_entry *field; - trace_assign_type(field, entry); + trace_assign_type(field, iter->ent); if (!trace_seq_printf(s, ": %lx : ", field->ip)) goto partial; @@ -921,12 +877,11 @@ trace_bprintk_raw(struct trace_iterator *iter, int flags) return TRACE_TYPE_PARTIAL_LINE; } -static struct trace_event trace_bprintk_event = { - .type = TRACE_BPRINTK, - .trace = trace_bprintk_print, - .raw = trace_bprintk_raw, - .hex = trace_nop_print, - .binary = trace_nop_print, + +static struct trace_event trace_print_event = { + .type = TRACE_PRINT, + .trace = trace_print_print, + .raw = trace_print_raw, }; static struct trace_event *events[] __initdata = { @@ -937,7 +892,6 @@ static struct trace_event *events[] __initdata = { &trace_stack_event, &trace_user_stack_event, &trace_print_event, - &trace_bprintk_event, NULL }; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 8a34d688ed6..3b90e6ade1a 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -18,6 +18,8 @@ struct trace_event { extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c new file mode 100644 index 00000000000..a50aea22e92 --- /dev/null +++ b/kernel/trace/trace_printk.c @@ -0,0 +1,138 @@ +/* + * trace binary printk + * + * Copyright (C) 2008 Lai Jiangshan + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +#ifdef CONFIG_MODULES + +/* + * modules trace_printk()'s formats are autosaved in struct trace_bprintk_fmt + * which are queued on trace_bprintk_fmt_list. + */ +static LIST_HEAD(trace_bprintk_fmt_list); + +/* serialize accesses to trace_bprintk_fmt_list */ +static DEFINE_MUTEX(btrace_mutex); + +struct trace_bprintk_fmt { + struct list_head list; + char fmt[0]; +}; + +static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) +{ + struct trace_bprintk_fmt *pos; + list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { + if (!strcmp(pos->fmt, fmt)) + return pos; + } + return NULL; +} + +static +void hold_module_trace_bprintk_format(const char **start, const char **end) +{ + const char **iter; + + mutex_lock(&btrace_mutex); + for (iter = start; iter < end; iter++) { + struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); + if (tb_fmt) { + *iter = tb_fmt->fmt; + continue; + } + + tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt) + + strlen(*iter) + 1, GFP_KERNEL); + if (tb_fmt) { + list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list); + strcpy(tb_fmt->fmt, *iter); + *iter = tb_fmt->fmt; + } else + *iter = NULL; + } + mutex_unlock(&btrace_mutex); +} + +static int module_trace_bprintk_format_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + if (mod->num_trace_bprintk_fmt) { + const char **start = mod->trace_bprintk_fmt_start; + const char **end = start + mod->num_trace_bprintk_fmt; + + if (val == MODULE_STATE_COMING) + hold_module_trace_bprintk_format(start, end); + } + return 0; +} + +#else /* !CONFIG_MODULES */ +__init static int +module_trace_bprintk_format_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + + +__initdata_or_module static +struct notifier_block module_trace_bprintk_format_nb = { + .notifier_call = module_trace_bprintk_format_notify, +}; + +int __trace_printk(unsigned long ip, const char *fmt, ...) + { + int ret; + va_list ap; + + if (unlikely(!fmt)) + return 0; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(__trace_printk); + +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) + { + if (unlikely(!fmt)) + return 0; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); +} +EXPORT_SYMBOL_GPL(__ftrace_vprintk); + + +static __init int init_trace_printk(void) +{ + return register_module_notifier(&module_trace_bprintk_format_nb); +} + +early_initcall(init_trace_printk); -- cgit v1.2.3-70-g09d2 From 9de36825b321fe9fe9cf73260554251af579f4ca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 6 Mar 2009 17:52:03 +0100 Subject: tracing: trace_bprintk() cleanups Impact: cleanup Remove a few leftovers and clean up the code a bit. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/module.c | 6 ------ kernel/trace/trace.h | 19 ++++++++----------- 2 files changed, 8 insertions(+), 17 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/module.c b/kernel/module.c index 2dece104f9a..22d7379709d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2158,12 +2158,6 @@ static noinline struct module *load_module(void __user *umod, &mod->num_tracepoints); #endif -#ifdef CONFIG_TRACE_BPRINTK - mod->trace_bprintk_fmt_start = section_objs(hdr, sechdrs, secstrings, - "__trace_bprintk_fmt", sizeof(char *), - &mod->num_trace_bprintk_fmt); -#endif - #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6140922392c..2bfb7d11fc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -119,14 +119,11 @@ struct userstack_entry { */ struct print_entry { struct trace_entry ent; - unsigned long ip; + unsigned long ip; int depth; const char *fmt; - u32 buf[]; + u32 buf[]; }; -#ifdef CONFIG_TRACE_BPRINTK -extern int trace_bprintk_enable; -#endif #define TRACE_OLD_SIZE 88 @@ -199,7 +196,7 @@ struct kmemtrace_free_entry { * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags + * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler @@ -302,7 +299,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ - IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ @@ -325,8 +322,8 @@ enum print_line_t { * flags value in struct tracer_flags. */ struct tracer_opt { - const char *name; /* Will appear on the trace_options file */ - u32 bit; /* Mask assigned in val field in tracer_flags */ + const char *name; /* Will appear on the trace_options file */ + u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* @@ -335,7 +332,7 @@ struct tracer_opt { */ struct tracer_flags { u32 val; - struct tracer_opt *opts; + struct tracer_opt *opts; }; /* Makes more easy to define a tracer opt */ @@ -390,7 +387,7 @@ struct tracer { int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; int print_max; - struct tracer_flags *flags; + struct tracer_flags *flags; struct tracer_stat *stats; }; -- cgit v1.2.3-70-g09d2 From da4d03020c2af32f73e8bfbab0a66620d85bb9bb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Mar 2009 17:14:30 -0400 Subject: tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 + include/trace/sched_event_types.h | 48 +++++++---- kernel/trace/trace.h | 5 -- kernel/trace/trace_event_types.h | 3 +- kernel/trace/trace_events.c | 159 +----------------------------------- kernel/trace/trace_events_stage_1.h | 28 ++++--- kernel/trace/trace_events_stage_2.h | 89 ++++++++++++++++---- kernel/trace/trace_events_stage_3.h | 34 +++----- kernel/trace/trace_export.c | 23 +++++- 9 files changed, 159 insertions(+), 233 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 3bcc3e17144..6b4f1bb3701 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -160,4 +160,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) +#define TRACE_EVENT(name, proto, args, struct, print, assign) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #endif diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index 71b14828a95..aa77fb75403 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -62,25 +62,41 @@ TRACE_EVENT_FORMAT(sched_wakeup_new, TP_RAW_FMT("task %d success=%d") ); -TRACE_EVENT_FORMAT(sched_switch, +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), + struct task_struct *next), + TP_ARGS(rq, prev, next), - TP_FMT("task %s:%d ==> %s:%d", - prev->comm, prev->pid, next->comm, next->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, prev_pid, prev->pid) - TRACE_FIELD(int, prev_prio, prev->prio) - TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], - next_comm, - TP_CMD(memcpy(TRACE_ENTRY->next_comm, - next->comm, - TASK_COMM_LEN))) - TRACE_FIELD(pid_t, next_pid, next->pid) - TRACE_FIELD(int, next_prio, next->prio) + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) ), - TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") - ); + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ) +); TRACE_EVENT_FORMAT(sched_migrate_task, TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2bfb7d11fc1..c5e1d8865fe 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -751,12 +751,7 @@ struct ftrace_event_call { int (*regfunc)(void); void (*unregfunc)(void); int id; - struct dentry *raw_dir; - int raw_enabled; - int type; int (*raw_init)(void); - int (*raw_reg)(void); - void (*raw_unreg)(void); int (*show_format)(struct trace_seq *s); }; diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index d94179aa1fc..5cca4c978bd 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -106,9 +106,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD(char *, fmt, fmt) TRACE_FIELD_ZERO_CHAR(buf) ), - TP_RAW_FMT("%08lx (%d) %s") + TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index fa32ca32076..1880a643809 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -59,22 +59,12 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, call->enabled = 0; call->unregfunc(); } - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - } break; case 1: - if (!call->enabled && - (call->type & TRACE_EVENT_TYPE_PRINTF)) { + if (!call->enabled) { call->enabled = 1; call->regfunc(); } - if (!call->raw_enabled && - (call->type & TRACE_EVENT_TYPE_RAW)) { - call->raw_enabled = 1; - call->raw_reg(); - } break; } } @@ -300,7 +290,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled || call->raw_enabled) + if (call->enabled) buf = "1\n"; else buf = "0\n"; @@ -346,107 +336,6 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } -static ssize_t -event_type_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - if (call->type & TRACE_EVENT_TYPE_PRINTF) - r += sprintf(buf, "printf\n"); - - if (call->type & TRACE_EVENT_TYPE_RAW) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[64]; - - /* - * If there's only one type, we can't change it. - * And currently we always have printf type, and we - * may or may not have raw type. - * - * This is a redundant check, the file should be read - * only if this is the case anyway. - */ - - if (!call->raw_init) - return -EPERM; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - if (!strncmp(buf, "printf", 6) && - (!buf[6] || isspace(buf[6]))) { - - call->type = TRACE_EVENT_TYPE_PRINTF; - - /* - * If raw enabled, the disable it and enable - * printf type. - */ - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - - call->enabled = 1; - call->regfunc(); - } - - } else if (!strncmp(buf, "raw", 3) && - (!buf[3] || isspace(buf[3]))) { - - call->type = TRACE_EVENT_TYPE_RAW; - - /* - * If printf enabled, the disable it and enable - * raw type. - */ - if (call->enabled) { - call->enabled = 0; - call->unregfunc(); - - call->raw_enabled = 1; - call->raw_reg(); - } - } else - return -EINVAL; - - *ppos += cnt; - - return cnt; -} - -static ssize_t -event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - r += sprintf(buf, "printf\n"); - - if (call->raw_init) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - #undef FIELD #define FIELD(type, name) \ #type, #name, (unsigned int)offsetof(typeof(field), name), \ @@ -470,6 +359,7 @@ static int trace_write_header(struct trace_seq *s) FIELD(int, pid), FIELD(int, tgid)); } + static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -527,13 +417,6 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; -static const struct file_operations ftrace_avail_fops = { - .open = ftrace_event_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_seq_open, .read = seq_read, @@ -548,17 +431,6 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; -static const struct file_operations ftrace_type_fops = { - .open = tracing_open_generic, - .read = event_type_read, - .write = event_type_write, -}; - -static const struct file_operations ftrace_available_types_fops = { - .open = tracing_open_generic, - .read = event_available_types_read, -}; - static const struct file_operations ftrace_event_format_fops = { .open = tracing_open_generic, .read = event_format_read, @@ -647,9 +519,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } - /* default the output to printf */ - call->type = TRACE_EVENT_TYPE_PRINTF; - call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -665,21 +534,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) "'%s/enable' entry\n", call->name); } - /* Only let type be writable, if we can change it */ - entry = debugfs_create_file("type", - call->raw_init ? 0644 : 0444, - call->dir, call, - &ftrace_type_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/type' entry\n", call->name); - - entry = debugfs_create_file("available_types", 0444, call->dir, call, - &ftrace_available_types_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/available_types' entry\n", call->name); - /* A trace may not want to export its format */ if (!call->show_format) return 0; @@ -704,13 +558,6 @@ static __init int event_trace_init(void) if (!d_tracer) return 0; - entry = debugfs_create_file("available_events", 0444, d_tracer, - (void *)&show_event_seq_ops, - &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_events' entry\n"); - entry = debugfs_create_file("set_event", 0644, d_tracer, (void *)&show_set_event_seq_ops, &ftrace_set_event_fops); diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 3830a731424..edfcbd3a0d1 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -18,19 +18,23 @@ #define TRACE_FORMAT(call, proto, args, fmt) #undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) + +#undef __array +#define __array(type, item, len) type item[len]; -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __field +#define __field(type, item) type item; -#define TRACE_FIELD(type, item, assign) \ - type item; -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - type_item; +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, print, assign) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name #include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 8e2e0f56c2a..d91bf4c5666 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -32,23 +32,14 @@ * in binary. */ -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __entry +#define __entry field -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - field->item, +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - field->item, - - -#undef TP_RAW_FMT -#define TP_RAW_FMT(args...) args - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ @@ -66,14 +57,76 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ - ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ + ret = trace_seq_printf(s, print); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + #include -#include "trace_format.h" +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry "REC" + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, func) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 41b82b93c9c..8e398d86409 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -144,27 +144,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .unregfunc = ftrace_unreg_event_##call, \ } -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TP_CMD -#define TP_CMD(cmd...) cmd - -#undef TRACE_ENTRY -#define TRACE_ENTRY entry +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, raw) \ + TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - cmd; +#undef __entry +#define __entry entry -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ \ static struct ftrace_event_call event_##call; \ \ @@ -185,7 +173,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - tstruct; \ + assign; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ } \ @@ -226,10 +214,8 @@ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ .raw_init = ftrace_raw_init_event_##call, \ - .raw_reg = ftrace_raw_reg_event_##call, \ - .raw_unreg = ftrace_raw_unreg_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ } diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index e62bc10f810..23ae78430d5 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -15,7 +15,28 @@ #include "trace_output.h" -#include "trace_format.h" + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; #undef TRACE_FIELD_ZERO_CHAR #define TRACE_FIELD_ZERO_CHAR(item) \ -- cgit v1.2.3-70-g09d2 From 1852fcce181faa237c010a3dbedb473cf9d4555f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 14:33:00 -0400 Subject: tracing: expand the ring buffers when an event is activated To save memory, the tracer ring buffers are set to a minimum. The activating of a trace expands the ring buffer size. This patch adds this expanding, when an event is activated. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 20 ++++++++++++++++++++ kernel/trace/trace.h | 3 +++ kernel/trace/trace_events.c | 8 ++++++++ 3 files changed, 31 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c1dc185085..35ee63ae412 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2357,6 +2357,26 @@ static int tracing_resize_ring_buffer(unsigned long size) return ret; } +/** + * tracing_update_buffers - used by tracing facility to expand ring buffers + * + * To save on memory when the tracing is never used on a system with it + * configured in. The ring buffers are set to a minimum size. But once + * a user starts to use the tracing facility, then they need to grow + * to their default size. + * + * This function is to be called when a tracer is about to be used. + */ +int tracing_update_buffers(void) +{ + int ret = 0; + + if (!ring_buffer_expanded) + ret = tracing_resize_ring_buffer(trace_buf_size); + + return ret; +} + struct trace_option_dentry; static struct trace_option_dentry * diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5e1d8865fe..336324d717f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -737,6 +737,9 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* set ring buffers to default size if not already done so */ +int tracing_update_buffers(void); + /* trace event type bit fields, not numeric */ enum { TRACE_EVENT_TYPE_PRINTF = 1, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 769dfd00fc8..ca624df7359 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -141,6 +141,10 @@ ftrace_event_write(struct file *file, const char __user *ubuf, if (!cnt || cnt < 0) return 0; + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + ret = get_user(ch, ubuf++); if (ret) return ret; @@ -331,6 +335,10 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, if (ret < 0) return ret; + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + switch (val) { case 0: case 1: -- cgit v1.2.3-70-g09d2 From 48ead02030f849d011259244bb4ea9b985479006 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 12 Mar 2009 18:24:49 +0100 Subject: tracing/core: bring back raw trace_printk for dynamic formats strings Impact: fix callsites with dynamic format strings Since its new binary implementation, trace_printk() internally uses static containers for the format strings on each callsites. But the value is assigned once at build time, which means that it can't take dynamic formats. So this patch unearthes the raw trace_printk implementation for the callers that will need trace_printk to be able to carry these dynamic format strings. The trace_printk() macro will use the appropriate implementation for each callsite. Most of the time however, the binary implementation will still be used. The other impact of this patch is that mmiotrace_printk() will use the old implementation because it calls the low level trace_vprintk and we can't guess here whether the format passed in it is dynamic or not. Some parts of this patch have been written by Steven Rostedt (most notably the part that chooses the appropriate implementation for each callsites). Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 40 +++++++++++------ kernel/trace/trace.c | 85 +++++++++++++++++++++++++++++++++--- kernel/trace/trace.h | 13 +++++- kernel/trace/trace_event_types.h | 11 ++++- kernel/trace/trace_functions_graph.c | 6 +-- kernel/trace/trace_mmiotrace.c | 7 +-- kernel/trace/trace_output.c | 57 +++++++++++++++++++++--- kernel/trace/trace_printk.c | 33 +++++++++++--- 8 files changed, 213 insertions(+), 39 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7742798c920..1daca3b062b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -452,31 +452,45 @@ do { \ #define trace_printk(fmt, args...) \ do { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))); \ - \ - if (!trace_printk_fmt) \ - trace_printk_fmt = fmt; \ - \ __trace_printk_check_format(fmt, ##args); \ - __trace_printk(_THIS_IP_, trace_printk_fmt, ##args); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) +extern int +__trace_bprintk(unsigned long ip, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + extern int __trace_printk(unsigned long ip, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement. + */ #define ftrace_vprintk(fmt, vargs) \ do { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))); \ - \ - if (!trace_printk_fmt) \ - trace_printk_fmt = fmt; \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ \ - __ftrace_vprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + __ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs); \ + } else \ + __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ } while (0) +extern int +__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); + extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62a63b2b33d..dbb077d8a17 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1179,10 +1179,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace) /** - * trace_vprintk - write binary msg to tracing buffer + * trace_vbprintk - write binary msg to tracing buffer * */ -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -1191,7 +1191,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; - struct print_entry *entry; + struct bprint_entry *entry; unsigned long flags; int resched; int cpu, len = 0, size, pc; @@ -1219,7 +1219,7 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; size = sizeof(*entry) + sizeof(u32) * len; - event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc); + event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); @@ -1240,6 +1240,60 @@ out: return len; } +EXPORT_SYMBOL_GPL(trace_vbprintk); + +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +{ + static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; + static char trace_buf[TRACE_BUF_SIZE]; + + struct ring_buffer_event *event; + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + int cpu, len = 0, size, pc; + struct print_entry *entry; + unsigned long irq_flags; + + if (tracing_disabled || tracing_selftest_running) + return 0; + + pc = preempt_count(); + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + + if (unlikely(atomic_read(&data->disabled))) + goto out; + + pause_graph_tracing(); + raw_local_irq_save(irq_flags); + __raw_spin_lock(&trace_buf_lock); + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); + + len = min(len, TRACE_BUF_SIZE-1); + trace_buf[len] = 0; + + size = sizeof(*entry) + len + 1; + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); + if (!event) + goto out_unlock; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->depth = depth; + + memcpy(&entry->buf, trace_buf, len); + entry->buf[len] = 0; + ring_buffer_unlock_commit(tr->buffer, event); + + out_unlock: + __raw_spin_unlock(&trace_buf_lock); + raw_local_irq_restore(irq_flags); + unpause_graph_tracing(); + out: + preempt_enable_notrace(); + + return len; +} EXPORT_SYMBOL_GPL(trace_vprintk); enum trace_file_type { @@ -1628,6 +1682,22 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; } +static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct bprint_entry *field; + int ret; + + trace_assign_type(field, entry); + + ret = trace_seq_bprintf(s, field->fmt, field->buf); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; @@ -1637,7 +1707,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_bprintf(s, field->fmt, field->buf); + ret = trace_seq_printf(s, "%s", field->buf); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -1702,6 +1772,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter) return ret; } + if (iter->ent->type == TRACE_BPRINT && + trace_flags & TRACE_ITER_PRINTK && + trace_flags & TRACE_ITER_PRINTK_MSGONLY) + return print_bprintk_msg_only(iter); + if (iter->ent->type == TRACE_PRINT && trace_flags & TRACE_ITER_PRINTK && trace_flags & TRACE_ITER_PRINTK_MSGONLY) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 336324d717f..cede1ab49d0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,6 +20,7 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINT, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -117,7 +118,7 @@ struct userstack_entry { /* * trace_printk entry: */ -struct print_entry { +struct bprint_entry { struct trace_entry ent; unsigned long ip; int depth; @@ -125,6 +126,13 @@ struct print_entry { u32 buf[]; }; +struct print_entry { + struct trace_entry ent; + unsigned long ip; + int depth; + char buf[]; +}; + #define TRACE_OLD_SIZE 88 struct trace_field_cont { @@ -286,6 +294,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ @@ -570,6 +579,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern long ns2usecs(cycle_t nsec); extern int +trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args); +extern int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); extern unsigned long trace_flags; diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 5cca4c978bd..d0907d74642 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -102,7 +102,7 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); -TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, +TRACE_EVENT_FORMAT(bprint, TRACE_PRINT, bprint_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) @@ -112,6 +112,15 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); +TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, ip, ip) + TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD_ZERO_CHAR(buf) + ), + TP_RAW_FMT("%08lx (%d) fmt:%p %s") +); + TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned int, line, line) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8566c14b3e9..4c388607ed6 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -684,7 +684,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, } static enum print_line_t -print_graph_comment(struct print_entry *trace, struct trace_seq *s, +print_graph_comment(struct bprint_entry *trace, struct trace_seq *s, struct trace_entry *ent, struct trace_iterator *iter) { int i; @@ -781,8 +781,8 @@ print_graph_function(struct trace_iterator *iter) trace_assign_type(field, entry); return print_graph_return(&field->ret, s, entry, iter); } - case TRACE_PRINT: { - struct print_entry *field; + case TRACE_BPRINT: { + struct bprint_entry *field; trace_assign_type(field, entry); return print_graph_comment(field, s, entry, iter); } diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 23e346a734c..f095916e477 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -254,6 +254,7 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; struct print_entry *print = (struct print_entry *)entry; + const char *msg = print->buf; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); @@ -261,11 +262,7 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) int ret; /* The trailing newline must be in the message. */ - ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_bprintf(s, print->fmt, print->buf); + ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); if (!ret) return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 491832af9ba..ea9d3b410c7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -832,13 +832,13 @@ static struct trace_event trace_user_stack_event = { .binary = trace_special_bin, }; -/* TRACE_PRINT */ +/* TRACE_BPRINT */ static enum print_line_t -trace_print_print(struct trace_iterator *iter, int flags) +trace_bprint_print(struct trace_iterator *iter, int flags) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; - struct print_entry *field; + struct bprint_entry *field; trace_assign_type(field, entry); @@ -858,9 +858,10 @@ trace_print_print(struct trace_iterator *iter, int flags) } -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) +static enum print_line_t +trace_bprint_raw(struct trace_iterator *iter, int flags) { - struct print_entry *field; + struct bprint_entry *field; struct trace_seq *s = &iter->seq; trace_assign_type(field, iter->ent); @@ -878,12 +879,55 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) } +static struct trace_event trace_bprint_event = { + .type = TRACE_BPRINT, + .trace = trace_bprint_print, + .raw = trace_bprint_raw, +}; + +/* TRACE_PRINT */ +static enum print_line_t trace_print_print(struct trace_iterator *iter, + int flags) +{ + struct print_entry *field; + struct trace_seq *s = &iter->seq; + + trace_assign_type(field, iter->ent); + + if (!seq_print_ip_sym(s, field->ip, flags)) + goto partial; + + if (!trace_seq_printf(s, ": %s", field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) +{ + struct print_entry *field; + + trace_assign_type(field, iter->ent); + + if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf)) + goto partial; + + return TRACE_TYPE_HANDLED; + + partial: + return TRACE_TYPE_PARTIAL_LINE; +} + static struct trace_event trace_print_event = { - .type = TRACE_PRINT, + .type = TRACE_PRINT, .trace = trace_print_print, .raw = trace_print_raw, }; + static struct trace_event *events[] __initdata = { &trace_fn_event, &trace_ctx_event, @@ -891,6 +935,7 @@ static struct trace_event *events[] __initdata = { &trace_special_event, &trace_stack_event, &trace_user_stack_event, + &trace_bprint_event, &trace_print_event, NULL }; diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index a50aea22e92..f307a11e233 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -99,7 +99,7 @@ struct notifier_block module_trace_bprintk_format_nb = { .notifier_call = module_trace_bprintk_format_notify, }; -int __trace_printk(unsigned long ip, const char *fmt, ...) +int __trace_bprintk(unsigned long ip, const char *fmt, ...) { int ret; va_list ap; @@ -111,13 +111,13 @@ int __trace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + ret = trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(__trace_printk); +EXPORT_SYMBOL_GPL(__trace_bprintk); -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) +int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap) { if (unlikely(!fmt)) return 0; @@ -125,11 +125,34 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) if (!(trace_flags & TRACE_ITER_PRINTK)) return 0; + return trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); +} +EXPORT_SYMBOL_GPL(__ftrace_vbprintk); + +int __trace_printk(unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + va_start(ap, fmt); + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(__trace_printk); + +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) +{ + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); } EXPORT_SYMBOL_GPL(__ftrace_vprintk); - static __init int init_trace_printk(void) { return register_module_notifier(&module_trace_bprintk_format_nb); -- cgit v1.2.3-70-g09d2 From e9fb2b6d5845e24f104713591286b6f39761c027 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Mar 2009 14:19:25 -0400 Subject: tracing: have event_trace_printk use static tracer Impact: speed up on event tracing The event_trace_printk is currently a wrapper function that calls trace_vprintk. Because it uses a variable for the fmt it misses out on the optimization of using the binary printk. This patch makes event_trace_printk into a macro wrapper to use the fmt as the same as the trace_printks. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 17 +++++++++++++++++ kernel/trace/trace_events.c | 10 ---------- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cede1ab49d0..35cfa7bbaf3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -773,4 +773,21 @@ void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; +extern const char *__start___trace_bprintk_fmt[]; +extern const char *__stop___trace_bprintk_fmt[]; + +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ca624df7359..238ea95a411 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -24,16 +24,6 @@ static DEFINE_MUTEX(event_mutex); (unsigned long)event < (unsigned long)__stop_ftrace_events; \ event++) -void event_trace_printk(unsigned long ip, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - tracing_record_cmdline(current); - trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); - va_end(ap); -} - static void ftrace_clear_events(void) { struct ftrace_event_call *call = (void *)__start_ftrace_events; -- cgit v1.2.3-70-g09d2 From bdc067582b8b71c7771bab076bbc51569c594fb4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 13 Mar 2009 00:12:52 -0400 Subject: tracing: add comment for use of double __builtin_consant_p Impact: documentation The use of the double __builtin_contant_p checks in the event_trace_printk can be confusing to developers and reviewers. This patch adds a comment to explain why it is there. Requested-by: KOSAKI Motohiro LKML-Reference: <20090313122235.43EB.A69D9226@jp.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 35cfa7bbaf3..67595b8f0f1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -776,6 +776,11 @@ extern struct ftrace_event_call __stop_ftrace_events[]; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ #define event_trace_printk(ip, fmt, args...) \ do { \ __trace_printk_check_format(fmt, ##args); \ -- cgit v1.2.3-70-g09d2 From ee08c6eccb7d1295516f7cf420fddf7b14e9146f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Mar 2009 05:52:59 +0100 Subject: tracing/ftrace: syscall tracing infrastructure, basics Provide basic callbacks to do syscall tracing. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Lai Jiangshan LKML-Reference: <1236401580-5758-2-git-send-email-fweisbec@gmail.com> [ simplified it to a trace_printk() for now. ] Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 21 ++++++++ kernel/trace/Kconfig | 10 ++++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 2 + kernel/trace/trace_syscalls.c | 113 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+) create mode 100644 kernel/trace/trace_syscalls.c (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e1583f2639b..c146c1021a2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -503,4 +503,25 @@ static inline void trace_hw_branch_oops(void) {} #endif /* CONFIG_HW_BRANCH_TRACER */ +/* + * A syscall entry in the ftrace syscalls array. + * + * @syscall_nr: syscall number + */ +struct syscall_trace_entry { + int syscall_nr; +}; + +#ifdef CONFIG_FTRACE_SYSCALLS +extern void start_ftrace_syscalls(void); +extern void stop_ftrace_syscalls(void); +extern void ftrace_syscall_enter(struct pt_regs *regs); +extern void ftrace_syscall_exit(struct pt_regs *regs); +#else +static inline void start_ftrace_syscalls(void) { } +static inline void stop_ftrace_syscalls(void) { } +static inline void ftrace_syscall_enter(struct pt_regs *regs) { } +static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8e4a2a61cd7..95a0ad191f1 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -34,6 +34,9 @@ config HAVE_FTRACE_MCOUNT_RECORD config HAVE_HW_BRANCH_TRACER bool +config HAVE_FTRACE_SYSCALLS + bool + config TRACER_MAX_TRACE bool @@ -175,6 +178,13 @@ config EVENT_TRACER allowing the user to pick and choose which trace point they want to trace. +config FTRACE_SYSCALLS + bool "Trace syscalls" + depends on HAVE_FTRACE_SYSCALLS + select TRACING + help + Basic tracer to catch the syscall entry and exit events. + config BOOT_TRACER bool "Trace boot initcalls" select TRACING diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c7a2943796e..c3feea01c3e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -43,5 +43,6 @@ obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o obj-$(CONFIG_EVENT_TRACER) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACER) += trace_export.o +obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5e1d8865fe..3d49daae47d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -30,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_SYSCALL_ENTER, + TRACE_SYSCALL_EXIT, TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c new file mode 100644 index 00000000000..66cf97449af --- /dev/null +++ b/kernel/trace/trace_syscalls.c @@ -0,0 +1,113 @@ +#include +#include + +#include + +#include "trace_output.h" +#include "trace.h" + +static atomic_t refcount; + +void start_ftrace_syscalls(void) +{ + unsigned long flags; + struct task_struct *g, *t; + + if (atomic_inc_return(&refcount) != 1) + goto out; + + read_lock_irqsave(&tasklist_lock, flags); + + do_each_thread(g, t) { + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); + } while_each_thread(g, t); + + read_unlock_irqrestore(&tasklist_lock, flags); +out: + atomic_dec(&refcount); +} + +void stop_ftrace_syscalls(void) +{ + unsigned long flags; + struct task_struct *g, *t; + + if (atomic_dec_return(&refcount)) + goto out; + + read_lock_irqsave(&tasklist_lock, flags); + + do_each_thread(g, t) { + clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); + } while_each_thread(g, t); + + read_unlock_irqrestore(&tasklist_lock, flags); +out: + atomic_inc(&refcount); +} + +void ftrace_syscall_enter(struct pt_regs *regs) +{ + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + + trace_printk("syscall %d enter\n", syscall_nr); +} + +void ftrace_syscall_exit(struct pt_regs *regs) +{ + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + + trace_printk("syscall %d exit\n", syscall_nr); +} + +static int init_syscall_tracer(struct trace_array *tr) +{ + start_ftrace_syscalls(); + + return 0; +} + +static void reset_syscall_tracer(struct trace_array *tr) +{ + stop_ftrace_syscalls(); +} + +static struct trace_event syscall_enter_event = { + .type = TRACE_SYSCALL_ENTER, +}; + +static struct trace_event syscall_exit_event = { + .type = TRACE_SYSCALL_EXIT, +}; + +static struct tracer syscall_tracer __read_mostly = { + .name = "syscall", + .init = init_syscall_tracer, + .reset = reset_syscall_tracer +}; + +__init int register_ftrace_syscalls(void) +{ + int ret; + + ret = register_ftrace_event(&syscall_enter_event); + if (!ret) { + printk(KERN_WARNING "event %d failed to register\n", + syscall_enter_event.type); + WARN_ON_ONCE(1); + } + + ret = register_ftrace_event(&syscall_exit_event); + if (!ret) { + printk(KERN_WARNING "event %d failed to register\n", + syscall_exit_event.type); + WARN_ON_ONCE(1); + } + + return register_tracer(&syscall_tracer); +} +device_initcall(register_ftrace_syscalls); -- cgit v1.2.3-70-g09d2 From bed1ffca022cc876fb83161d26670e9b5d3cf36b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Mar 2009 15:42:11 +0100 Subject: tracing/syscalls: core infrastructure for syscalls tracing, enhancements Impact: new feature This adds the generic support for syscalls tracing. This is currently exploited through a devoted tracer but other tracing engines can use it. (They just have to play with {start,stop}_ftrace_syscalls() and use the display callbacks unless they want to override them.) The syscalls prototypes definitions are abused here to steal some metadata informations: - syscall name, param types, param names, number of params The syscall addr is not directly saved during this definition because we don't know if its prototype is available in the namespace. But we don't really need it. The arch has just to build a function able to resolve the syscall number to its metadata struct. The current tracer prints the syscall names, parameters names and values (and their types optionally). Currently the value is a raw hex but higher level values diplaying is on my TODO list. Signed-off-by: Frederic Weisbecker LKML-Reference: <1236955332-10133-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 11 ++- include/linux/ftrace.h | 14 +++- include/linux/syscalls.h | 60 +++++++++++++++- kernel/trace/trace.h | 17 +++++ kernel/trace/trace_syscalls.c | 146 +++++++++++++++++++++++++++++++++++--- 5 files changed, 234 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0e0f39be6c8..d3bc3c86df6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -77,6 +77,14 @@ #define TRACE_PRINTKS() #endif +#ifdef CONFIG_FTRACE_SYSCALLS +#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .; \ + *(__syscalls_metadata) \ + VMLINUX_SYMBOL(__stop_syscalls_metadata) = .; +#else +#define TRACE_SYSCALLS() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -99,7 +107,8 @@ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ - FTRACE_EVENTS() + FTRACE_EVENTS() \ + TRACE_SYSCALLS() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index c146c1021a2..6dc1c652447 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -506,13 +506,21 @@ static inline void trace_hw_branch_oops(void) {} /* * A syscall entry in the ftrace syscalls array. * - * @syscall_nr: syscall number + * @name: name of the syscall + * @nb_args: number of parameters it takes + * @types: list of types as strings + * @args: list of args as strings (args[i] matches types[i]) */ -struct syscall_trace_entry { - int syscall_nr; +struct syscall_metadata { + const char *name; + int nb_args; + const char **types; + const char **args; }; #ifdef CONFIG_FTRACE_SYSCALLS +extern void arch_init_ftrace_syscalls(void); +extern struct syscall_metadata *syscall_nr_to_meta(int nr); extern void start_ftrace_syscalls(void); extern void stop_ftrace_syscalls(void); extern void ftrace_syscall_enter(struct pt_regs *regs); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f9f900cfd06..0cff9bb80b0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -65,6 +65,7 @@ struct old_linux_dirent; #include #include #include +#include #define __SC_DECL1(t1, a1) t1 a1 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) @@ -95,7 +96,46 @@ struct old_linux_dirent; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_FTRACE_SYSCALLS +#define __SC_STR_ADECL1(t, a) #a +#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) +#define __SC_STR_ADECL3(t, a, ...) #a, __SC_STR_ADECL2(__VA_ARGS__) +#define __SC_STR_ADECL4(t, a, ...) #a, __SC_STR_ADECL3(__VA_ARGS__) +#define __SC_STR_ADECL5(t, a, ...) #a, __SC_STR_ADECL4(__VA_ARGS__) +#define __SC_STR_ADECL6(t, a, ...) #a, __SC_STR_ADECL5(__VA_ARGS__) + +#define __SC_STR_TDECL1(t, a) #t +#define __SC_STR_TDECL2(t, a, ...) #t, __SC_STR_TDECL1(__VA_ARGS__) +#define __SC_STR_TDECL3(t, a, ...) #t, __SC_STR_TDECL2(__VA_ARGS__) +#define __SC_STR_TDECL4(t, a, ...) #t, __SC_STR_TDECL3(__VA_ARGS__) +#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) +#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) + +#define SYSCALL_METADATA(sname, nb) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys"#sname, \ + .nb_args = nb, \ + .types = types_##sname, \ + .args = args_##sname, \ + } + +#define SYSCALL_DEFINE0(sname) \ + static const struct syscall_metadata __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("__syscalls_metadata"))) \ + __syscall_meta_##sname = { \ + .name = "sys_"#sname, \ + .nb_args = 0, \ + }; \ + asmlinkage long sys_##sname(void) + +#else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#endif + #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__) #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__) @@ -117,10 +157,26 @@ struct old_linux_dirent; #endif #endif +#ifdef CONFIG_FTRACE_SYSCALLS +#define SYSCALL_DEFINEx(x, sname, ...) \ + static const char *types_##sname[] = { \ + __SC_STR_TDECL##x(__VA_ARGS__) \ + }; \ + static const char *args_##sname[] = { \ + __SC_STR_ADECL##x(__VA_ARGS__) \ + }; \ + SYSCALL_METADATA(sname, x); \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#else +#define SYSCALL_DEFINEx(x, sname, ...) \ + __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) +#endif + #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS #define SYSCALL_DEFINE(name) static inline long SYSC_##name -#define SYSCALL_DEFINEx(x, name, ...) \ + +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ @@ -134,7 +190,7 @@ struct old_linux_dirent; #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name -#define SYSCALL_DEFINEx(x, name, ...) \ +#define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)) #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3d49daae47d..d80ca0d464d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -194,6 +194,19 @@ struct kmemtrace_free_entry { const void *ptr; }; +struct syscall_trace_enter { + struct trace_entry ent; + int nr; + unsigned long args[]; +}; + +struct syscall_trace_exit { + struct trace_entry ent; + int nr; + unsigned long ret; +}; + + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -306,6 +319,10 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ + IF_ASSIGN(var, ent, struct syscall_trace_enter, \ + TRACE_SYSCALL_ENTER); \ + IF_ASSIGN(var, ent, struct syscall_trace_exit, \ + TRACE_SYSCALL_EXIT); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 66cf97449af..c72e599230f 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,6 +1,5 @@ -#include #include - +#include #include #include "trace_output.h" @@ -8,6 +7,90 @@ static atomic_t refcount; +/* Our two options */ +enum { + TRACE_SYSCALLS_OPT_TYPES = 0x1, +}; + +static struct tracer_opt syscalls_opts[] = { + { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) }, + { } +}; + +static struct tracer_flags syscalls_flags = { + .val = 0, /* By default: no args types */ + .opts = syscalls_opts +}; + +enum print_line_t +print_syscall_enter(struct trace_iterator *iter, int flags) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; + struct syscall_trace_enter *trace; + struct syscall_metadata *entry; + int i, ret, syscall; + + trace_assign_type(trace, ent); + + syscall = trace->nr; + + entry = syscall_nr_to_meta(syscall); + if (!entry) + goto end; + + ret = trace_seq_printf(s, "%s(", entry->name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + for (i = 0; i < entry->nb_args; i++) { + /* parameter types */ + if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { + ret = trace_seq_printf(s, "%s ", entry->types[i]); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + /* parameter values */ + ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], + trace->args[i], + i == entry->nb_args - 1 ? ")" : ","); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + +end: + trace_seq_printf(s, "\n"); + return TRACE_TYPE_HANDLED; +} + +enum print_line_t +print_syscall_exit(struct trace_iterator *iter, int flags) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; + struct syscall_trace_exit *trace; + int syscall; + struct syscall_metadata *entry; + int ret; + + trace_assign_type(trace, ent); + + syscall = trace->nr; + + entry = syscall_nr_to_meta(syscall); + if (!entry) { + trace_seq_printf(s, "\n"); + return TRACE_TYPE_HANDLED; + } + + ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, + trace->ret); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + void start_ftrace_syscalls(void) { unsigned long flags; @@ -16,6 +99,7 @@ void start_ftrace_syscalls(void) if (atomic_inc_return(&refcount) != 1) goto out; + arch_init_ftrace_syscalls(); read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, t) { @@ -48,20 +132,63 @@ out: void ftrace_syscall_enter(struct pt_regs *regs) { + struct syscall_trace_enter *entry; + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; + int size; int syscall_nr; + int cpu; syscall_nr = syscall_get_nr(current, regs); - trace_printk("syscall %d enter\n", syscall_nr); + cpu = raw_smp_processor_id(); + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + + event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, + 0, 0); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->nr = syscall_nr; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); + + trace_current_buffer_unlock_commit(event, 0, 0); + trace_wake_up(); } void ftrace_syscall_exit(struct pt_regs *regs) { + struct syscall_trace_exit *entry; + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; int syscall_nr; + int cpu; syscall_nr = syscall_get_nr(current, regs); - trace_printk("syscall %d exit\n", syscall_nr); + cpu = raw_smp_processor_id(); + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, + sizeof(*entry), 0, 0); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->nr = syscall_nr; + entry->ret = syscall_get_return_value(current, regs); + + trace_current_buffer_unlock_commit(event, 0, 0); + trace_wake_up(); } static int init_syscall_tracer(struct trace_array *tr) @@ -77,17 +204,20 @@ static void reset_syscall_tracer(struct trace_array *tr) } static struct trace_event syscall_enter_event = { - .type = TRACE_SYSCALL_ENTER, + .type = TRACE_SYSCALL_ENTER, + .trace = print_syscall_enter, }; static struct trace_event syscall_exit_event = { - .type = TRACE_SYSCALL_EXIT, + .type = TRACE_SYSCALL_EXIT, + .trace = print_syscall_exit, }; static struct tracer syscall_tracer __read_mostly = { - .name = "syscall", + .name = "syscall", .init = init_syscall_tracer, - .reset = reset_syscall_tracer + .reset = reset_syscall_tracer, + .flags = &syscalls_flags, }; __init int register_ftrace_syscalls(void) -- cgit v1.2.3-70-g09d2 From 4ca530852346be239b7c19e7bec5d2b78855bebe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Mar 2009 19:20:15 -0400 Subject: tracing: protect reader of cmdline output Impact: fix to one cause of incorrect comm outputs in trace The spinlock only protected the creation of a comm <=> pid pair. But it was possible that a reader could look up a pid, and get the wrong comm because it had no locking. This also required changing trace_find_cmdline to copy the comm cache and not just send back a pointer to it. Signed-off-by: Steven Rostedt --- kernel/trace/blktrace.c | 23 ++++++++++++++++++----- kernel/trace/trace.c | 20 ++++++++++++-------- kernel/trace/trace.h | 2 +- kernel/trace/trace_functions_graph.c | 12 ++++++------ kernel/trace/trace_output.c | 18 ++++++++++++------ 5 files changed, 49 insertions(+), 26 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 1f32e4edf49..b171778e386 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1027,7 +1027,9 @@ static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t, static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { - const char *cmd = trace_find_cmdline(ent->pid); + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); if (t_sec(ent)) return trace_seq_printf(s, "%llu + %u [%s]\n", @@ -1057,19 +1059,30 @@ static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) { - return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid)); + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); + + return trace_seq_printf(s, "[%s]\n", cmd); } static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) { - return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid), - get_pdu_int(ent)); + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); + + return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); } static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) { + char cmd[TASK_COMM_LEN]; + + trace_find_cmdline(ent->pid, cmd); + return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), - get_pdu_int(ent), trace_find_cmdline(ent->pid)); + get_pdu_int(ent), cmd); } /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index efe3202c020..2796bd2b17e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -770,25 +770,29 @@ static void trace_save_cmdline(struct task_struct *tsk) __raw_spin_unlock(&trace_cmdline_lock); } -char *trace_find_cmdline(int pid) +void trace_find_cmdline(int pid, char comm[]) { - char *cmdline = "<...>"; unsigned map; - if (!pid) - return ""; + if (!pid) { + strcpy(comm, ""); + return; + } - if (pid > PID_MAX_DEFAULT) - goto out; + if (pid > PID_MAX_DEFAULT) { + strcpy(comm, "<...>"); + return; + } + __raw_spin_lock(&trace_cmdline_lock); map = map_pid_to_cmdline[pid]; if (map >= SAVED_CMDLINES) goto out; - cmdline = saved_cmdlines[map]; + strcpy(comm, saved_cmdlines[map]); out: - return cmdline; + __raw_spin_unlock(&trace_cmdline_lock); } void tracing_record_cmdline(struct task_struct *tsk) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 56ce34d90b0..b0ecad8ecc3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -547,7 +547,7 @@ struct tracer_switch_ops { }; #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ -extern char *trace_find_cmdline(int pid); +extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4c388607ed6..6004ccac2dd 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -190,15 +190,15 @@ print_graph_cpu(struct trace_seq *s, int cpu) static enum print_line_t print_graph_proc(struct trace_seq *s, pid_t pid) { - int i; - int ret; - int len; - char comm[8]; - int spaces = 0; + char comm[TASK_COMM_LEN]; /* sign + log10(MAX_INT) + '\0' */ char pid_str[11]; + int spaces = 0; + int ret; + int len; + int i; - strncpy(comm, trace_find_cmdline(pid), 7); + trace_find_cmdline(pid, comm); comm[7] = '\0'; sprintf(pid_str, "%d", pid); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index ea9d3b410c7..6a4c9dea191 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -309,9 +309,9 @@ static int lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) { int hardirq, softirq; - char *comm; + char comm[TASK_COMM_LEN]; - comm = trace_find_cmdline(entry->pid); + trace_find_cmdline(entry->pid, comm); hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; @@ -346,10 +346,12 @@ int trace_print_context(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; - char *comm = trace_find_cmdline(entry->pid); unsigned long long t = ns2usecs(iter->ts); unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned long secs = (unsigned long)t; + char comm[TASK_COMM_LEN]; + + trace_find_cmdline(entry->pid, comm); return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ", comm, entry->pid, iter->cpu, secs, usec_rem); @@ -372,7 +374,10 @@ int trace_print_lat_context(struct trace_iterator *iter) rel_usecs = ns2usecs(next_ts - iter->ts); if (verbose) { - char *comm = trace_find_cmdline(entry->pid); + char comm[TASK_COMM_LEN]; + + trace_find_cmdline(entry->pid, comm); + ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, entry->pid, iter->cpu, entry->flags, @@ -577,14 +582,15 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, char *delim) { struct ctx_switch_entry *field; - char *comm; + char comm[TASK_COMM_LEN]; int S, T; + trace_assign_type(field, iter->ent); T = task_state_char(field->next_state); S = task_state_char(field->prev_state); - comm = trace_find_cmdline(field->next_pid); + trace_find_cmdline(field->next_pid, comm); if (!trace_seq_printf(&iter->seq, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", field->prev_pid, -- cgit v1.2.3-70-g09d2 From af4617bdba34aa556272b34c3986b0a4d588f568 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2009 18:09:55 -0400 Subject: tracing: add global-clock option to provide cross CPU clock to traces Impact: feature to allow better serialized clock This patch adds an option called "global-clock" that will allow the tracer to switch to a slower but more accurate (across CPUs) clock. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 34 ++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3be2f788e10..2f994caab0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -315,6 +315,7 @@ static const char *trace_options[] = { "printk-msg-only", "context-info", "latency-format", + "global-clock", NULL }; @@ -2251,6 +2252,34 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg) return 0; } +static void set_tracer_flags(unsigned int mask, int enabled) +{ + /* do nothing if flag is already set */ + if (!!(trace_flags & mask) == !!enabled) + return; + + if (enabled) + trace_flags |= mask; + else + trace_flags &= ~mask; + + if (mask == TRACE_ITER_GLOBAL_CLK) { + u64 (*func)(void); + + if (enabled) + func = trace_clock_global; + else + func = trace_clock_local; + + mutex_lock(&trace_types_lock); + ring_buffer_set_clock(global_trace.buffer, func); + + if (max_tr.buffer) + ring_buffer_set_clock(max_tr.buffer, func); + mutex_unlock(&trace_types_lock); + } +} + static ssize_t tracing_trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -2278,10 +2307,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, int len = strlen(trace_options[i]); if (strncmp(cmp, trace_options[i], len) == 0) { - if (neg) - trace_flags &= ~(1 << i); - else - trace_flags |= (1 << i); + set_tracer_flags(1 << i, !neg); break; } } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b0ecad8ecc3..26a7a28ca11 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -667,6 +667,7 @@ enum trace_iterator_flags { TRACE_ITER_PRINTK_MSGONLY = 0x10000, TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ TRACE_ITER_LATENCY_FMT = 0x40000, + TRACE_ITER_GLOBAL_CLK = 0x80000, }; /* -- cgit v1.2.3-70-g09d2 From 40ce74f19c28077550646c76d96a075bf312e461 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 Mar 2009 14:03:53 -0400 Subject: tracing: remove recording function depth from trace_printk The function depth in trace_printk was to facilitate the function graph output. Now that the function graph calculates the depth within the trace output, we no longer need to record the depth when the trace_printk is called. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 8 +++----- kernel/trace/trace.h | 6 ++---- kernel/trace/trace_event_types.h | 2 -- kernel/trace/trace_mmiotrace.c | 2 +- kernel/trace/trace_printk.c | 8 ++++---- 5 files changed, 10 insertions(+), 16 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c637cb687cf..f7f359d4582 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1194,7 +1194,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) * trace_vbprintk - write binary msg to tracing buffer * */ -int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; @@ -1236,7 +1236,6 @@ int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; entry = ring_buffer_event_data(event); entry->ip = ip; - entry->depth = depth; entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); @@ -1254,7 +1253,7 @@ out: } EXPORT_SYMBOL_GPL(trace_vbprintk); -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) +int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; @@ -1291,7 +1290,6 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) goto out_unlock; entry = ring_buffer_event_data(event); entry->ip = ip; - entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; @@ -3140,7 +3138,7 @@ static int mark_printk(const char *fmt, ...) int ret; va_list args; va_start(args, fmt); - ret = trace_vprintk(0, -1, fmt, args); + ret = trace_vprintk(0, fmt, args); va_end(args); return ret; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 38276d1638e..7c9a0cbf5dc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -123,7 +123,6 @@ struct userstack_entry { struct bprint_entry { struct trace_entry ent; unsigned long ip; - int depth; const char *fmt; u32 buf[]; }; @@ -131,7 +130,6 @@ struct bprint_entry { struct print_entry { struct trace_entry ent; unsigned long ip; - int depth; char buf[]; }; @@ -598,9 +596,9 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern long ns2usecs(cycle_t nsec); extern int -trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args); +trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int -trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); +trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 019915063fe..fd78bee71dd 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -105,7 +105,6 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(unsigned int, depth, depth) TRACE_FIELD(char *, fmt, fmt) TRACE_FIELD_ZERO_CHAR(buf) ), @@ -115,7 +114,6 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(unsigned int, depth, depth) TRACE_FIELD_ZERO_CHAR(buf) ), TP_RAW_FMT("%08lx (%d) fmt:%p %s") diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index f095916e477..8e37fcddd8b 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -359,5 +359,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map) int mmio_trace_printk(const char *fmt, va_list args) { - return trace_vprintk(0, -1, fmt, args); + return trace_vprintk(0, fmt, args); } diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 486785214e3..eb81556107f 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -112,7 +112,7 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); + ret = trace_vbprintk(ip, fmt, ap); va_end(ap); return ret; } @@ -126,7 +126,7 @@ int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap) if (!(trace_flags & TRACE_ITER_PRINTK)) return 0; - return trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap); + return trace_vbprintk(ip, fmt, ap); } EXPORT_SYMBOL_GPL(__ftrace_vbprintk); @@ -139,7 +139,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + ret = trace_vprintk(ip, fmt, ap); va_end(ap); return ret; } @@ -150,7 +150,7 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap) if (!(trace_flags & TRACE_ITER_PRINTK)) return 0; - return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); + return trace_vprintk(ip, fmt, ap); } EXPORT_SYMBOL_GPL(__ftrace_vprintk); -- cgit v1.2.3-70-g09d2 From ac199db0189c091f2863312061c0575937f68810 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:15 +0100 Subject: ftrace: event profile hooks Impact: new tracing infrastructure feature Provide infrastructure to generate software perf counter events from tracepoints. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <20090319194233.557364871@chello.nl> Signed-off-by: Ingo Molnar --- kernel/trace/Makefile | 1 + kernel/trace/events.c | 1 - kernel/trace/trace.h | 11 ++++++++++ kernel/trace/trace_event_profile.c | 31 ++++++++++++++++++++++++++ kernel/trace/trace_events.c | 9 ++------ kernel/trace/trace_events_stage_3.h | 44 +++++++++++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 8 deletions(-) create mode 100644 kernel/trace/trace_event_profile.c (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c3feea01c3e..0e45c206c2f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -44,5 +44,6 @@ obj-$(CONFIG_EVENT_TRACER) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACER) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o +obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o libftrace-y := ftrace.o diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 9fc918da404..246f2aa6dc4 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -12,4 +12,3 @@ #include "trace_events_stage_2.h" #include "trace_events_stage_3.h" -#include diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7c9a0cbf5dc..7cfb741be20 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -785,12 +785,23 @@ struct ftrace_event_call { int id; int (*raw_init)(void); int (*show_format)(struct trace_seq *s); + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif }; void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; +#define for_each_event(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c new file mode 100644 index 00000000000..22cba997077 --- /dev/null +++ b/kernel/trace/trace_event_profile.c @@ -0,0 +1,31 @@ +/* + * trace event based perf counter profiling + * + * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra + * + */ + +#include "trace.h" + +int ftrace_profile_enable(int event_id) +{ + struct ftrace_event_call *event; + + for_each_event(event) { + if (event->id == event_id) + return event->profile_enable(event); + } + + return -EINVAL; +} + +void ftrace_profile_disable(int event_id) +{ + struct ftrace_event_call *event; + + for_each_event(event) { + if (event->id == event_id) + return event->profile_disable(event); + } +} + diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7763db8fd0b..3047b56f663 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -19,11 +19,6 @@ static DEFINE_MUTEX(event_mutex); -#define events_for_each(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - static void ftrace_clear_events(void) { struct ftrace_event_call *call = (void *)__start_ftrace_events; @@ -90,7 +85,7 @@ static int ftrace_set_clr_event(char *buf, int set) } mutex_lock(&event_mutex); - events_for_each(call) { + for_each_event(call) { if (!call->name || !call->regfunc) continue; @@ -628,7 +623,7 @@ static __init int event_trace_init(void) if (!d_events) return 0; - events_for_each(call) { + for_each_event(call) { /* The linker may leave blanks */ if (!call->name) continue; diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 4c26d97b450..6b3261ca988 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -109,6 +109,40 @@ #undef TP_FMT #define TP_FMT(fmt, args...) fmt "\n", ##args +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +{ \ + if (atomic_add_negative(-1, &call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + #define _TRACE_FORMAT(call, proto, args, fmt) \ static void ftrace_event_##call(proto) \ { \ @@ -147,6 +181,7 @@ static int ftrace_init_event_##call(void) \ #undef TRACE_FORMAT #define TRACE_FORMAT(call, proto, args, fmt) \ _TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ @@ -155,6 +190,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_init_event_##call, \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ + _TRACE_PROFILE_INIT(call) \ } #undef __entry @@ -162,6 +198,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ \ static struct ftrace_event_call event_##call; \ \ @@ -227,4 +264,11 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ + _TRACE_PROFILE_INIT(call) \ } + +#include + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + -- cgit v1.2.3-70-g09d2 From cf027f645e6aee4f0ca6197a6b6a57f327fdb13f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:30:39 -0500 Subject: tracing: add run-time field descriptions for event filtering This patch makes the field descriptions defined for event tracing available at run-time, for the event-filtering mechanism introduced in a subsequent patch. The common event fields are prepended with 'common_' in the format display, allowing them to be distinguished from the other fields that might internally have same name and can therefore be unambiguously used in filters. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710639.7703.46.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 30 +++++++++++++++++-------- kernel/trace/trace_events.c | 40 ++++++++++++++++++++++++++++++++- kernel/trace/trace_events_stage_2.h | 45 +++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 2 ++ 4 files changed, 107 insertions(+), 10 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be20..9288dc7ad14 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -775,16 +775,26 @@ enum { TRACE_EVENT_TYPE_RAW = 2, }; +struct ftrace_event_field { + struct list_head link; + char *name; + char *type; + int offset; + int size; +}; + struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); - int id; - int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; @@ -793,6 +803,8 @@ struct ftrace_event_call { #endif }; +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size); void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3047b56f663..961b057da28 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -19,6 +19,34 @@ static DEFINE_MUTEX(event_mutex); +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size) +{ + struct ftrace_event_field *field; + + field = kmalloc(sizeof(*field), GFP_KERNEL); + if (!field) + goto err; + field->name = kstrdup(name, GFP_KERNEL); + if (!field->name) + goto err; + field->type = kstrdup(type, GFP_KERNEL); + if (!field->type) + goto err; + field->offset = offset; + field->size = size; + list_add(&field->link, &call->fields); + + return 0; +err: + if (field) { + kfree(field->name); + kfree(field->type); + } + kfree(field); + return -ENOMEM; +} + static void ftrace_clear_events(void) { struct ftrace_event_call *call = (void *)__start_ftrace_events; @@ -343,7 +371,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, #undef FIELD #define FIELD(type, name) \ - #type, #name, offsetof(typeof(field), name), sizeof(field.name) + #type, "common_" #name, offsetof(typeof(field), name), \ + sizeof(field.name) static int trace_write_header(struct trace_seq *s) { @@ -581,6 +610,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) call->name); } + if (call->define_fields) { + ret = call->define_fields(); + if (ret < 0) { + pr_warning("Could not initialize trace point" + " events/%s\n", call->name); + return ret; + } + } + /* A trace may not want to export its format */ if (!call->show_format) return 0; diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 5117c43f5c6..30743f7d411 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \ } #include + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 6b3261ca988..468938f7014 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -252,6 +252,7 @@ static int ftrace_raw_init_event_##call(void) \ if (!id) \ return -ENODEV; \ event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ } \ \ @@ -264,6 +265,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ _TRACE_PROFILE_INIT(call) \ } -- cgit v1.2.3-70-g09d2 From 7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:31:04 -0500 Subject: tracing: add per-event filtering This patch adds per-event filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each event directory. This file can be written to to set filters; reading from it will display the current set of filters set for that event. Basically, any field listed in the 'format' file for an event can be filtered on (including strings, but not yet other array types) using either matching ('==') or non-matching ('!=') 'predicates'. A 'predicate' can be either a single expression: # echo pid != 0 > filter # cat filter pid != 0 or a compound expression of up to 8 sub-expressions combined using '&&' or '||': # echo comm == Xorg > filter # echo "&& sig != 29" > filter # cat filter comm == Xorg && sig != 29 Only events having field values matching an expression will be available in the trace output; non-matching events are discarded. Note that a compound expression is built up by echoing each sub-expression separately - it's not the most efficient way to do things, but it keeps the parser simple and assumes that compound expressions will be relatively uncommon. In any case, a subsequent patch introducing a way to set filters for entire subsystems should mitigate any need to do this for lots of events. Setting a filter without an '&&' or '||' clears the previous filter completely and sets the filter to the new expression: # cat filter comm == Xorg && sig != 29 # echo comm != Xorg # cat filter comm != Xorg To clear a filter, echo 0 to the filter file: # echo 0 > filter # cat filter none The limit of 8 predicates for a compound expression is arbitrary - for efficiency, it's implemented as an array of pointers to predicates, and 8 seemed more than enough for any filter... Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710665.7703.48.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/Makefile | 1 + kernel/trace/trace.h | 28 ++++ kernel/trace/trace_events.c | 77 +++++++++ kernel/trace/trace_events_filter.c | 326 ++++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 4 + 5 files changed, 436 insertions(+) create mode 100644 kernel/trace/trace_events_filter.c (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0e45c206c2f..2630f5121ec 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -45,5 +45,6 @@ obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACER) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o +obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9288dc7ad14..d9eb39e4bb3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -795,6 +795,7 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; + struct filter_pred **preds; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; @@ -803,8 +804,35 @@ struct ftrace_event_call { #endif }; +#define MAX_FILTER_PRED 8 + +struct filter_pred; + +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); + +struct filter_pred { + filter_pred_fn_t fn; + u64 val; + char *str_val; + int str_len; + char *field_name; + int offset; + int not; + int or; + int compound; + int clear; +}; + int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); +extern void filter_free_pred(struct filter_pred *pred); +extern int filter_print_preds(struct filter_pred **preds, char *buf); +extern int filter_parse(char **pbuf, struct filter_pred *pred); +extern int filter_add_pred(struct ftrace_event_call *call, + struct filter_pred *pred); +extern void filter_free_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); + void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 961b057da28..97470c48956 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -459,6 +459,71 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) return r; } +static ssize_t +event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + r = filter_print_preds(call->preds, s->buffer); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + + kfree(s); + + return r; +} + +static ssize_t +event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[64], *pbuf = buf; + struct filter_pred *pred; + int err; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return -ENOMEM; + + err = filter_parse(&pbuf, pred); + if (err < 0) { + filter_free_pred(pred); + return err; + } + + if (pred->clear) { + filter_free_preds(call); + return cnt; + } + + if (filter_add_pred(call, pred)) { + filter_free_pred(pred); + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -504,6 +569,12 @@ static const struct file_operations ftrace_event_id_fops = { .read = event_id_read, }; +static const struct file_operations ftrace_event_filter_fops = { + .open = tracing_open_generic, + .read = event_filter_read, + .write = event_filter_write, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -619,6 +690,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } + entry = debugfs_create_file("filter", 0444, call->dir, call, + &ftrace_event_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", call->name); + /* A trace may not want to export its format */ if (!call->show_format) return 0; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c new file mode 100644 index 00000000000..8e8c5fa25be --- /dev/null +++ b/kernel/trace/trace_events_filter.c @@ -0,0 +1,326 @@ +/* + * trace_events_filter - generic event filtering + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2009 Tom Zanussi + */ + +#include +#include +#include +#include + +#include "trace.h" + +static int filter_pred_64(struct filter_pred *pred, void *event) +{ + u64 *addr = (u64 *)(event + pred->offset); + u64 val = (u64)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_32(struct filter_pred *pred, void *event) +{ + u32 *addr = (u32 *)(event + pred->offset); + u32 val = (u32)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_16(struct filter_pred *pred, void *event) +{ + u16 *addr = (u16 *)(event + pred->offset); + u16 val = (u16)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_8(struct filter_pred *pred, void *event) +{ + u8 *addr = (u8 *)(event + pred->offset); + u8 val = (u8)pred->val; + int match; + + match = (val == *addr) ^ pred->not; + + return match; +} + +static int filter_pred_string(struct filter_pred *pred, void *event) +{ + char *addr = (char *)(event + pred->offset); + int cmp, match; + + cmp = strncmp(addr, pred->str_val, pred->str_len); + + match = (!cmp) ^ pred->not; + + return match; +} + +/* return 1 if event matches, 0 otherwise (discard) */ +int filter_match_preds(struct ftrace_event_call *call, void *rec) +{ + int i, matched, and_failed = 0; + struct filter_pred *pred; + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (call->preds[i]) { + pred = call->preds[i]; + if (and_failed && !pred->or) + continue; + matched = pred->fn(pred, rec); + if (!matched && !pred->or) { + and_failed = 1; + continue; + } else if (matched && pred->or) + return 1; + } else + break; + } + + if (and_failed) + return 0; + + return 1; +} + +int filter_print_preds(struct filter_pred **preds, char *buf) +{ + ssize_t this_len = 0; + char *field_name; + struct filter_pred *pred; + int i; + + if (!preds) { + this_len += sprintf(buf + this_len, "none\n"); + return this_len; + } + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (preds[i]) { + pred = preds[i]; + field_name = pred->field_name; + if (i) + this_len += sprintf(buf + this_len, + pred->or ? "|| " : "&& "); + this_len += sprintf(buf + this_len, + "%s ", field_name); + this_len += sprintf(buf + this_len, + pred->not ? "!= " : "== "); + if (pred->str_val) + this_len += sprintf(buf + this_len, + "%s\n", pred->str_val); + else + this_len += sprintf(buf + this_len, + "%llu\n", pred->val); + } else + break; + } + + return this_len; +} + +static struct ftrace_event_field * +find_event_field(struct ftrace_event_call *call, char *name) +{ + struct ftrace_event_field *field; + struct list_head *entry, *tmp; + + list_for_each_safe(entry, tmp, &call->fields) { + field = list_entry(entry, struct ftrace_event_field, link); + if (!strcmp(field->name, name)) + return field; + } + + return NULL; +} + +void filter_free_pred(struct filter_pred *pred) +{ + if (!pred) + return; + + kfree(pred->field_name); + kfree(pred->str_val); + kfree(pred); +} + +void filter_free_preds(struct ftrace_event_call *call) +{ + int i; + + if (call->preds) { + for (i = 0; i < MAX_FILTER_PRED; i++) + filter_free_pred(call->preds[i]); + kfree(call->preds); + call->preds = NULL; + } +} + +static int __filter_add_pred(struct ftrace_event_call *call, + struct filter_pred *pred) +{ + int i; + + if (call->preds && !pred->compound) + filter_free_preds(call); + + if (!call->preds) { + call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + GFP_KERNEL); + if (!call->preds) + return -ENOMEM; + } + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (!call->preds[i]) { + call->preds[i] = pred; + return 0; + } + } + + return -ENOMEM; +} + +static int is_string_field(const char *type) +{ + if (strchr(type, '[') && strstr(type, "char")) + return 1; + + return 0; +} + +int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) +{ + struct ftrace_event_field *field; + + field = find_event_field(call, pred->field_name); + if (!field) + return -EINVAL; + + pred->offset = field->offset; + + if (is_string_field(field->type)) { + pred->fn = filter_pred_string; + pred->str_len = field->size; + return __filter_add_pred(call, pred); + } + + switch (field->size) { + case 8: + pred->fn = filter_pred_64; + break; + case 4: + pred->fn = filter_pred_32; + break; + case 2: + pred->fn = filter_pred_16; + break; + case 1: + pred->fn = filter_pred_8; + break; + default: + return -EINVAL; + } + + return __filter_add_pred(call, pred); +} + +int filter_parse(char **pbuf, struct filter_pred *pred) +{ + char *tmp, *tok, *val_str = NULL; + int tok_n = 0; + + /* field ==/!= number, or/and field ==/!= number, number */ + while ((tok = strsep(pbuf, " \n"))) { + if (tok_n == 0) { + if (!strcmp(tok, "0")) { + pred->clear = 1; + return 0; + } else if (!strcmp(tok, "&&")) { + pred->or = 0; + pred->compound = 1; + } else if (!strcmp(tok, "||")) { + pred->or = 1; + pred->compound = 1; + } else + pred->field_name = tok; + tok_n = 1; + continue; + } + if (tok_n == 1) { + if (!pred->field_name) + pred->field_name = tok; + else if (!strcmp(tok, "!=")) + pred->not = 1; + else if (!strcmp(tok, "==")) + pred->not = 0; + else { + pred->field_name = NULL; + return -EINVAL; + } + tok_n = 2; + continue; + } + if (tok_n == 2) { + if (pred->compound) { + if (!strcmp(tok, "!=")) + pred->not = 1; + else if (!strcmp(tok, "==")) + pred->not = 0; + else { + pred->field_name = NULL; + return -EINVAL; + } + } else { + val_str = tok; + break; /* done */ + } + tok_n = 3; + continue; + } + if (tok_n == 3) { + val_str = tok; + break; /* done */ + } + } + + pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); + if (!pred->field_name) + return -ENOMEM; + + pred->val = simple_strtoull(val_str, &tmp, 10); + if (tmp == val_str) { + pred->str_val = kstrdup(val_str, GFP_KERNEL); + if (!pred->str_val) + return -ENOMEM; + } + + return 0; +} + + diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 468938f7014..ebf215e87d5 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -204,6 +204,7 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ + struct ftrace_event_call *call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ @@ -222,6 +223,9 @@ static void ftrace_raw_event_##call(proto) \ assign; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ + \ + if (call->preds && !filter_match_preds(call, entry)) \ + ring_buffer_event_discard(event); \ } \ \ static int ftrace_raw_reg_event_##call(void) \ -- cgit v1.2.3-70-g09d2 From cfb180f3e71b2a280a254c8646a9ab1beab63f84 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:31:17 -0500 Subject: tracing: add per-subsystem filtering This patch adds per-subsystem filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each subsystem directory. This file can be written to to set filters; reading from it will display the current set of filters set for that subsystem. Basically what it does is propagate the filter down to each event contained in the subsystem. If a particular event doesn't have a field with the name specified in the filter, it simply doesn't get set for that event. You can verify whether or not the filter was set for a particular event by looking at the filter file for that event. As with per-event filters, compound expressions are supported, echoing '0' to the subsystem's filter file clears all filters in the subsystem, etc. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710677.7703.49.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 15 +++++++ kernel/trace/trace_events.c | 86 +++++++++++++++++++++++++++++++++++--- kernel/trace/trace_events_filter.c | 80 +++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d9eb39e4bb3..f267723c3c5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -804,6 +804,18 @@ struct ftrace_event_call { #endif }; +struct event_subsystem { + struct list_head list; + const char *name; + struct dentry *entry; + struct filter_pred **preds; +}; + +#define events_for_each(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + #define MAX_FILTER_PRED 8 struct filter_pred; @@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); extern void filter_free_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern void filter_free_subsystem_preds(struct event_subsystem *system); +extern int filter_add_subsystem_pred(struct event_subsystem *system, + struct filter_pred *pred); void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 97470c48956..97d4daaddd9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct event_subsystem *system = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + r = filter_print_preds(system->preds, s->buffer); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + + kfree(s); + + return r; +} + +static ssize_t +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct event_subsystem *system = filp->private_data; + char buf[64], *pbuf = buf; + struct filter_pred *pred; + int err; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return -ENOMEM; + + err = filter_parse(&pbuf, pred); + if (err < 0) { + filter_free_pred(pred); + return err; + } + + if (pred->clear) { + filter_free_subsystem_preds(system); + return cnt; + } + + if (filter_add_subsystem_pred(system, pred)) { + filter_free_pred(pred); + return -EINVAL; + } + + *ppos += cnt; + + return cnt; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = { .write = event_filter_write, }; +static const struct file_operations ftrace_subsystem_filter_fops = { + .open = tracing_open_generic, + .read = subsystem_filter_read, + .write = subsystem_filter_write, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void) return d_events; } -struct event_subsystem { - struct list_head list; - const char *name; - struct dentry *entry; -}; - static LIST_HEAD(event_subsystems); static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { struct event_subsystem *system; + struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { @@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->name = name; list_add(&system->list, &event_subsystems); + system->preds = NULL; + + entry = debugfs_create_file("filter", 0444, system->entry, system, + &ftrace_subsystem_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", name); + return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8e8c5fa25be..1ab20cee0e4 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call) } } +void filter_free_subsystem_preds(struct event_subsystem *system) +{ + struct ftrace_event_call *call = __start_ftrace_events; + int i; + + if (system->preds) { + for (i = 0; i < MAX_FILTER_PRED; i++) + filter_free_pred(system->preds[i]); + kfree(system->preds); + system->preds = NULL; + } + + events_for_each(call) { + if (!call->name || !call->regfunc) + continue; + + if (!strcmp(call->system, system->name)) + filter_free_preds(call); + } +} + static int __filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) { @@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) return __filter_add_pred(call, pred); } +static struct filter_pred *copy_pred(struct filter_pred *pred) +{ + struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); + if (!new_pred) + return NULL; + + memcpy(new_pred, pred, sizeof(*pred)); + if (pred->str_val) { + new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); + new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); + if (!new_pred->str_val) { + kfree(new_pred); + return NULL; + } + } + + return new_pred; +} + +int filter_add_subsystem_pred(struct event_subsystem *system, + struct filter_pred *pred) +{ + struct ftrace_event_call *call = __start_ftrace_events; + struct filter_pred *event_pred; + int i; + + if (system->preds && !pred->compound) + filter_free_subsystem_preds(system); + + if (!system->preds) { + system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + GFP_KERNEL); + if (!system->preds) + return -ENOMEM; + } + + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (!system->preds[i]) { + system->preds[i] = pred; + break; + } + if (i == MAX_FILTER_PRED - 1) + return -EINVAL; + } + + events_for_each(call) { + if (!call->name || !call->regfunc) + continue; + + if (!strcmp(call->system, system->name)) { + event_pred = copy_pred(pred); + if (event_pred) + filter_add_pred(call, event_pred); + } + } + + return 0; +} + int filter_parse(char **pbuf, struct filter_pred *pred) { char *tmp, *tok, *val_str = NULL; -- cgit v1.2.3-70-g09d2 From 07edf7121374609709ef1b0889f6e7b8d6a62ec1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 23:10:46 +0100 Subject: tracing/events: don't use wake up for events Impact: fix hard-lockup with sched switch events Some ftrace events, such as sched wakeup, can be traced while the runqueue lock is hold. Since they are using trace_current_buffer_unlock_commit(), they call wake_up() which can try to grab the runqueue lock too, resulting in a deadlock. Now for all event, we call a new helper: trace_nowake_buffer_unlock_commit() which do pretty the same than trace_current_buffer_unlock_commit() except than it doesn't call trace_wake_up(). Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1237759847-21025-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 26 +++++++++++++++++++++----- kernel/trace/trace.h | 2 ++ kernel/trace/trace_events_stage_3.h | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e6fac0ffe6f..6bad12819eb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -860,15 +860,25 @@ static void ftrace_trace_stack(struct trace_array *tr, static void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer_event *event, - unsigned long flags, int pc) +static inline void __trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc, + int wake) { ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); - trace_wake_up(); + + if (wake) + trace_wake_up(); +} + +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + __trace_buffer_unlock_commit(tr, event, flags, pc, 1); } struct ring_buffer_event * @@ -882,7 +892,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return trace_buffer_unlock_commit(&global_trace, event, flags, pc); + return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); +} + +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); } void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f267723c3c5..54fd9bcd0a6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -483,6 +483,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, unsigned long flags, int pc); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index ebf215e87d5..9a3bd49b52e 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -222,7 +222,7 @@ static void ftrace_raw_event_##call(proto) \ \ assign; \ \ - trace_current_buffer_unlock_commit(event, irq_flags, pc); \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ \ if (call->preds && !filter_match_preds(call, entry)) \ ring_buffer_event_discard(event); \ -- cgit v1.2.3-70-g09d2 From 4bda2d517bfa3ce3d7044e06988cdddae7adffe2 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 24 Mar 2009 02:14:31 -0500 Subject: tracing/filters: use trace_seq_printf() to print filters Impact: cleanup Instead of just using the trace_seq buffer to print the filters, use trace_seq_printf() as it was intended to be used. Reported-by: Steven Rostedt Signed-off-by: Tom Zanussi Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237878871.8339.59.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 3 ++- kernel/trace/trace_events.c | 8 ++++---- kernel/trace/trace_events_filter.c | 25 +++++++++---------------- 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54fd9bcd0a6..90a848debcb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -840,7 +840,8 @@ struct filter_pred { int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); extern void filter_free_pred(struct filter_pred *pred); -extern int filter_print_preds(struct filter_pred **preds, char *buf); +extern void filter_print_preds(struct filter_pred **preds, + struct trace_seq *s); extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a9381384aa9..d132997ab75 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -481,8 +481,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - r = filter_print_preds(call->preds, s->buffer); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + filter_print_preds(call->preds, s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -547,8 +547,8 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - r = filter_print_preds(system->preds, s->buffer); - r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r); + filter_print_preds(system->preds, s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 3f0b79f8a4b..9fca8bb1c06 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -24,6 +24,7 @@ #include #include "trace.h" +#include "trace_output.h" static int filter_pred_64(struct filter_pred *pred, void *event) { @@ -108,16 +109,15 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) return 1; } -int filter_print_preds(struct filter_pred **preds, char *buf) +void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) { - ssize_t this_len = 0; char *field_name; struct filter_pred *pred; int i; if (!preds) { - this_len += sprintf(buf + this_len, "none\n"); - return this_len; + trace_seq_printf(s, "none\n"); + return; } for (i = 0; i < MAX_FILTER_PRED; i++) { @@ -125,23 +125,16 @@ int filter_print_preds(struct filter_pred **preds, char *buf) pred = preds[i]; field_name = pred->field_name; if (i) - this_len += sprintf(buf + this_len, - pred->or ? "|| " : "&& "); - this_len += sprintf(buf + this_len, - "%s ", field_name); - this_len += sprintf(buf + this_len, - pred->not ? "!= " : "== "); + trace_seq_printf(s, pred->or ? "|| " : "&& "); + trace_seq_printf(s, "%s ", field_name); + trace_seq_printf(s, pred->not ? "!= " : "== "); if (pred->str_val) - this_len += sprintf(buf + this_len, - "%s\n", pred->str_val); + trace_seq_printf(s, "%s\n", pred->str_val); else - this_len += sprintf(buf + this_len, - "%llu\n", pred->val); + trace_seq_printf(s, "%llu\n", pred->val); } else break; } - - return this_len; } static struct ftrace_event_field * -- cgit v1.2.3-70-g09d2 From be6f164a02f394675e2ac2077dd354cebef5b4c0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 11:06:24 -0400 Subject: function-graph: add option for include sleep times Impact: give user a choice to show times spent while sleeping The user may want to see the time a function spent sleeping. This patch adds the trace option "sleep-time" to allow that. The "sleep-time" option is default on. echo sleep-time > /debug/tracing/trace_options produces: ------------------------------------------ 2) avahi-d-3428 => -0 ------------------------------------------ 2) | finish_task_switch() { 2) 0.621 us | _spin_unlock_irq(); 2) 2.202 us | } 2) ! 1002.197 us | } 2) ! 1003.521 us | } where as, echo nosleep-time > /debug/tracing/trace_options produces: 0) -0 => yum-upd-3416 ------------------------------------------ 0) | finish_task_switch() { 0) 0.643 us | _spin_unlock_irq(); 0) 2.342 us | } 0) + 41.302 us | } 0) + 42.453 us | } Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 7 +++++++ kernel/trace/trace.c | 3 ++- kernel/trace/trace.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0b90364d1a2..02d2de9d08b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2599,6 +2599,13 @@ ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, unsigned long long timestamp; int index; + /* + * Does the user want to count the time a function was asleep. + * If so, do not update the time stamps. + */ + if (trace_flags & TRACE_ITER_SLEEP_TIME) + return; + timestamp = trace_clock_local(); prev->ftrace_timestamp = timestamp; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f0e1337b1eb..67c6a21dd42 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -316,6 +316,7 @@ static const char *trace_options[] = { "context-info", "latency-format", "global-clock", + "sleep-time", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be20..d7410bbb9a8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -683,6 +683,7 @@ enum trace_iterator_flags { TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, + TRACE_ITER_SLEEP_TIME = 0x100000, }; /* -- cgit v1.2.3-70-g09d2 From ca2b84cb3c4a0d4d2143b46ec072cdff5d1b3b87 Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Mon, 23 Mar 2009 15:12:24 +0200 Subject: kmemtrace: use tracepoints kmemtrace now uses tracepoints instead of markers. We no longer need to use format specifiers to pass arguments. Signed-off-by: Eduard - Gabriel Munteanu [ folded: Use the new TP_PROTO and TP_ARGS to fix the build. ] [ folded: fix build when CONFIG_KMEMTRACE is disabled. ] [ folded: define tracepoints when CONFIG_TRACEPOINTS is enabled. ] Signed-off-by: Pekka Enberg LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/slab_def.h | 10 +-- include/linux/slub_def.h | 12 +-- include/trace/kmemtrace.h | 92 +++++++++------------ kernel/trace/kmemtrace.c | 206 ++++++++++++++++++++++++++++++++-------------- kernel/trace/trace.h | 6 ++ mm/slab.c | 24 +++--- mm/slob.c | 28 +++---- mm/slub.c | 30 +++---- mm/util.c | 16 ++++ 9 files changed, 251 insertions(+), 173 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index f4523651fa4..5ac9b0bcaf9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -73,8 +73,8 @@ found: ret = kmem_cache_alloc_notrace(cachep, flags); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, - size, slab_buffer_size(cachep), flags); + trace_kmalloc(_THIS_IP_, ret, + size, slab_buffer_size(cachep), flags); return ret; } @@ -128,9 +128,9 @@ found: ret = kmem_cache_alloc_node_notrace(cachep, flags, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, - ret, size, slab_buffer_size(cachep), - flags, node); + trace_kmalloc_node(_THIS_IP_, ret, + size, slab_buffer_size(cachep), + flags, node); return ret; } diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a1f90528e70..5046f90c117 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -233,8 +233,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) unsigned int order = get_order(size); void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret, - size, PAGE_SIZE << order, flags); + trace_kmalloc(_THIS_IP_, ret, size, PAGE_SIZE << order, flags); return ret; } @@ -255,9 +254,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) ret = kmem_cache_alloc_notrace(s, flags); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, - _THIS_IP_, ret, - size, s->size, flags); + trace_kmalloc(_THIS_IP_, ret, size, s->size, flags); return ret; } @@ -296,9 +293,8 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) ret = kmem_cache_alloc_node_notrace(s, flags, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - _THIS_IP_, ret, - size, s->size, flags, node); + trace_kmalloc_node(_THIS_IP_, ret, + size, s->size, flags, node); return ret; } diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h index ad8b7857855..28ee69f9cd4 100644 --- a/include/trace/kmemtrace.h +++ b/include/trace/kmemtrace.h @@ -9,65 +9,53 @@ #ifdef __KERNEL__ +#include #include -#include - -enum kmemtrace_type_id { - KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ - KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ - KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ -}; #ifdef CONFIG_KMEMTRACE - extern void kmemtrace_init(void); - -extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node); - -extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr); - -#else /* CONFIG_KMEMTRACE */ - +#else static inline void kmemtrace_init(void) { } - -static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ -} - -static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ -} - -#endif /* CONFIG_KMEMTRACE */ - -static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags) -{ - kmemtrace_mark_alloc_node(type_id, call_site, ptr, - bytes_req, bytes_alloc, gfp_flags, -1); -} +#endif + +DECLARE_TRACE(kmalloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmem_cache_alloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmalloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kmem_cache_alloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kfree, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); +DECLARE_TRACE(kmem_cache_free, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); #endif /* __KERNEL__ */ diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index ae201b3eda8..4f7b5db5dd0 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "trace.h" @@ -29,10 +30,150 @@ static struct tracer_flags kmem_tracer_flags = { .opts = kmem_opts }; - -static bool kmem_tracing_enabled __read_mostly; static struct trace_array *kmemtrace_array; +/* Trace allocations */ +static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + struct ring_buffer_event *event; + struct kmemtrace_alloc_entry *entry; + struct trace_array *tr = kmemtrace_array; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_ALLOC; + entry->call_site = call_site; + entry->ptr = ptr; + entry->bytes_req = bytes_req; + entry->bytes_alloc = bytes_alloc; + entry->gfp_flags = gfp_flags; + entry->node = node; + + ring_buffer_unlock_commit(tr->buffer, event); + + trace_wake_up(); +} + +static inline void kmemtrace_free(enum kmemtrace_type_id type_id, + unsigned long call_site, + const void *ptr) +{ + struct ring_buffer_event *event; + struct kmemtrace_free_entry *entry; + struct trace_array *tr = kmemtrace_array; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + + entry->ent.type = TRACE_KMEM_FREE; + entry->type_id = type_id; + entry->call_site = call_site; + entry->ptr = ptr; + + ring_buffer_unlock_commit(tr->buffer, event); + + trace_wake_up(); +} + +static void kmemtrace_kmalloc(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +static void kmemtrace_kmem_cache_alloc(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags) +{ + kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, -1); +} + +static void kmemtrace_kmalloc_node(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, node); +} + +static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node) +{ + kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, + bytes_req, bytes_alloc, gfp_flags, node); +} + +static void kmemtrace_kfree(unsigned long call_site, const void *ptr) +{ + kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); +} + +static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr) +{ + kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); +} + +static int kmemtrace_start_probes(void) +{ + int err; + + err = register_trace_kmalloc(kmemtrace_kmalloc); + if (err) + return err; + err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); + if (err) + return err; + err = register_trace_kmalloc_node(kmemtrace_kmalloc_node); + if (err) + return err; + err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); + if (err) + return err; + err = register_trace_kfree(kmemtrace_kfree); + if (err) + return err; + err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free); + + return err; +} + +static void kmemtrace_stop_probes(void) +{ + unregister_trace_kmalloc(kmemtrace_kmalloc); + unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); + unregister_trace_kmalloc_node(kmemtrace_kmalloc_node); + unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); + unregister_trace_kfree(kmemtrace_kfree); + unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free); +} + static int kmem_trace_init(struct trace_array *tr) { int cpu; @@ -41,14 +182,14 @@ static int kmem_trace_init(struct trace_array *tr) for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); - kmem_tracing_enabled = true; + kmemtrace_start_probes(); return 0; } static void kmem_trace_reset(struct trace_array *tr) { - kmem_tracing_enabled = false; + kmemtrace_stop_probes(); } static void kmemtrace_headers(struct seq_file *s) @@ -260,63 +401,6 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) } } -/* Trace allocations */ -void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node) -{ - struct ring_buffer_event *event; - struct kmemtrace_alloc_entry *entry; - struct trace_array *tr = kmemtrace_array; - - if (!kmem_tracing_enabled) - return; - - event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC, - sizeof(*entry), 0, 0); - if (!event) - return; - entry = ring_buffer_event_data(event); - - entry->call_site = call_site; - entry->ptr = ptr; - entry->bytes_req = bytes_req; - entry->bytes_alloc = bytes_alloc; - entry->gfp_flags = gfp_flags; - entry->node = node; - - trace_buffer_unlock_commit(tr, event, 0, 0); -} -EXPORT_SYMBOL(kmemtrace_mark_alloc_node); - -void kmemtrace_mark_free(enum kmemtrace_type_id type_id, - unsigned long call_site, - const void *ptr) -{ - struct ring_buffer_event *event; - struct kmemtrace_free_entry *entry; - struct trace_array *tr = kmemtrace_array; - - if (!kmem_tracing_enabled) - return; - - event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE, - sizeof(*entry), 0, 0); - if (!event) - return; - entry = ring_buffer_event_data(event); - entry->type_id = type_id; - entry->call_site = call_site; - entry->ptr = ptr; - - trace_buffer_unlock_commit(tr, event, 0, 0); -} -EXPORT_SYMBOL(kmemtrace_mark_free); - static struct tracer kmem_tracer __read_mostly = { .name = "kmemtrace", .init = kmem_trace_init, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb0ce3fc36d..cbc168f1e43 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -182,6 +182,12 @@ struct trace_power { struct power_trace state_data; }; +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + struct kmemtrace_alloc_entry { struct trace_entry ent; enum kmemtrace_type_id type_id; diff --git a/mm/slab.c b/mm/slab.c index 9ec66c3e6ee..fa00fd6a644 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3565,8 +3565,8 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - obj_size(cachep), cachep->buffer_size, flags); + trace_kmem_cache_alloc(_RET_IP_, ret, + obj_size(cachep), cachep->buffer_size, flags); return ret; } @@ -3627,9 +3627,9 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) void *ret = __cache_alloc_node(cachep, flags, nodeid, __builtin_return_address(0)); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - obj_size(cachep), cachep->buffer_size, - flags, nodeid); + trace_kmem_cache_alloc_node(_RET_IP_, ret, + obj_size(cachep), cachep->buffer_size, + flags, nodeid); return ret; } @@ -3657,9 +3657,8 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) return cachep; ret = kmem_cache_alloc_node_notrace(cachep, flags, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - (unsigned long) caller, ret, - size, cachep->buffer_size, flags, node); + trace_kmalloc_node((unsigned long) caller, ret, + size, cachep->buffer_size, flags, node); return ret; } @@ -3709,9 +3708,8 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, return cachep; ret = __cache_alloc(cachep, flags, caller); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, - (unsigned long) caller, ret, - size, cachep->buffer_size, flags); + trace_kmalloc((unsigned long) caller, ret, + size, cachep->buffer_size, flags); return ret; } @@ -3757,7 +3755,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) __cache_free(cachep, objp); local_irq_restore(flags); - kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp); + trace_kmem_cache_free(_RET_IP_, objp); } EXPORT_SYMBOL(kmem_cache_free); @@ -3785,7 +3783,7 @@ void kfree(const void *objp) __cache_free(c, (void *)objp); local_irq_restore(flags); - kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp); + trace_kfree(_RET_IP_, objp); } EXPORT_SYMBOL(kfree); diff --git a/mm/slob.c b/mm/slob.c index 4dd6516447f..00003587ebf 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -490,9 +490,8 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) *m = size; ret = (void *)m + align; - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - _RET_IP_, ret, - size, size + align, gfp, node); + trace_kmalloc_node(_RET_IP_, ret, + size, size + align, gfp, node); } else { unsigned int order = get_order(size); @@ -503,9 +502,8 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) page->private = size; } - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - _RET_IP_, ret, - size, PAGE_SIZE << order, gfp, node); + trace_kmalloc_node(_RET_IP_, ret, + size, PAGE_SIZE << order, gfp, node); } return ret; @@ -527,7 +525,7 @@ void kfree(const void *block) } else put_page(&sp->page); - kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block); + trace_kfree(_RET_IP_, block); } EXPORT_SYMBOL(kfree); @@ -599,16 +597,14 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) if (c->size < PAGE_SIZE) { b = slob_alloc(c->size, flags, c->align, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, - _RET_IP_, b, c->size, - SLOB_UNITS(c->size) * SLOB_UNIT, - flags, node); + trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, + SLOB_UNITS(c->size) * SLOB_UNIT, + flags, node); } else { b = slob_new_pages(flags, get_order(c->size), node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, - _RET_IP_, b, c->size, - PAGE_SIZE << get_order(c->size), - flags, node); + trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, + PAGE_SIZE << get_order(c->size), + flags, node); } if (c->ctor) @@ -646,7 +642,7 @@ void kmem_cache_free(struct kmem_cache *c, void *b) __kmem_cache_free(b, c->size); } - kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b); + trace_kmem_cache_free(_RET_IP_, b); } EXPORT_SYMBOL(kmem_cache_free); diff --git a/mm/slub.c b/mm/slub.c index 7aaa121d0ea..a98078bf738 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1621,8 +1621,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - s->objsize, s->size, gfpflags); + trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); return ret; } @@ -1641,8 +1640,8 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret, - s->objsize, s->size, gfpflags, node); + trace_kmem_cache_alloc_node(_RET_IP_, ret, + s->objsize, s->size, gfpflags, node); return ret; } @@ -1767,7 +1766,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) slab_free(s, page, x, _RET_IP_); - kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x); + trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); @@ -2702,8 +2701,7 @@ void *__kmalloc(size_t size, gfp_t flags) ret = slab_alloc(s, flags, -1, _RET_IP_); - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, - size, s->size, flags); + trace_kmalloc(_RET_IP_, ret, size, s->size, flags); return ret; } @@ -2729,10 +2727,9 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) if (unlikely(size > SLUB_MAX_SIZE)) { ret = kmalloc_large_node(size, flags, node); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, - _RET_IP_, ret, - size, PAGE_SIZE << get_order(size), - flags, node); + trace_kmalloc_node(_RET_IP_, ret, + size, PAGE_SIZE << get_order(size), + flags, node); return ret; } @@ -2744,8 +2741,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) ret = slab_alloc(s, flags, node, _RET_IP_); - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret, - size, s->size, flags, node); + trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); return ret; } @@ -2807,7 +2803,7 @@ void kfree(const void *x) } slab_free(page->slab, page, object, _RET_IP_); - kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x); + trace_kfree(_RET_IP_, x); } EXPORT_SYMBOL(kfree); @@ -3290,8 +3286,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) ret = slab_alloc(s, gfpflags, -1, caller); /* Honor the call site pointer we recieved. */ - kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size, - s->size, gfpflags); + trace_kmalloc(caller, ret, size, s->size, gfpflags); return ret; } @@ -3313,8 +3308,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, ret = slab_alloc(s, gfpflags, node, caller); /* Honor the call site pointer we recieved. */ - kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret, - size, s->size, gfpflags, node); + trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); return ret; } diff --git a/mm/util.c b/mm/util.c index 7c122e49f76..2599e83eea1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,6 +4,7 @@ #include #include #include +#include #include /** @@ -236,3 +237,18 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, return ret; } EXPORT_SYMBOL_GPL(get_user_pages_fast); + +/* Tracepoints definitions. */ +DEFINE_TRACE(kmalloc); +DEFINE_TRACE(kmem_cache_alloc); +DEFINE_TRACE(kmalloc_node); +DEFINE_TRACE(kmem_cache_alloc_node); +DEFINE_TRACE(kfree); +DEFINE_TRACE(kmem_cache_free); + +EXPORT_TRACEPOINT_SYMBOL(kmalloc); +EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); +EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); +EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); +EXPORT_TRACEPOINT_SYMBOL(kfree); +EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); -- cgit v1.2.3-70-g09d2 From cf8e3474654f20433aab9aa35826d43b5f245008 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 30 Mar 2009 13:48:00 +0800 Subject: tracing: fix incorrect return type of ns2usecs() Impact: fix time output bug in 32bits system ns2usecs() returns 'long', it's incorrect. (In i386) ... -0 [000] 521.442100: _spin_lock <-tick_do_update_jiffies64 -0 [000] 521.442101: do_timer <-tick_do_update_jiffies64 -0 [000] 521.442102: update_wall_time <-do_timer -0 [000] 521.442102: update_xtime_cache <-update_wall_time .... (It always print the time less than 2200 seconds besides ...) Because 'long' is 32bits in i386. ( (1<<31) useconds is about 2200 seconds) ... -0 [001] 4154502640.134759: rcu_bh_qsctr_inc <-__do_softirq -0 [001] 4154502640.134760: _local_bh_enable <-__do_softirq -0 [001] 4154502640.134761: idle_cpu <-irq_exit ... (very large value) Because 'long' is a signed type and it is 32bits in i386. Changes in v2: return 'unsigned long long' instead of 'cycle_t' Signed-off-by: Lai Jiangshan LKML-Reference: <49D05D10.4030009@cn.fujitsu.com> Reported-by: Li Zefan Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 +-- kernel/trace/trace.h | 2 +- kernel/trace/trace_output.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0174a40c56..457dd8c97e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -147,8 +147,7 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); -long -ns2usecs(cycle_t nsec) +unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; do_div(nsec, 1000); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb0ce3fc36d..0d81a4a2a4a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -596,7 +596,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern long ns2usecs(cycle_t nsec); +extern unsigned long long ns2usecs(cycle_t nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d72b9a63b24..64b54a59c55 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" + ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx, -- cgit v1.2.3-70-g09d2