From bac429f037f1a51a74d62bad6d1518c3be065df3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Mar 2009 12:50:56 -0400 Subject: tracing: add function profiler Impact: new profiling feature This patch adds a function profiler. In debugfs/tracing/ two new files are created. function_profile_enabled - to enable or disable profiling trace_stat/functions - the profiled functions. For example: echo 1 > /debugfs/tracing/function_profile_enabled ./hackbench 50 echo 0 > /debugfs/tracing/function_profile_enabled yields: cat /debugfs/tracing/trace_stat/functions Function Hit -------- --- _spin_lock 10106442 _spin_unlock 10097492 kfree 6013704 _spin_unlock_irqrestore 4423941 _spin_lock_irqsave 4406825 __phys_addr 4181686 __slab_free 4038222 dput 4030130 path_put 4023387 unroll_tree_refs 4019532 [...] The most hit functions are listed first. Functions that are not hit are not listed. This feature depends on and uses dynamic function tracing. When the function profiling is disabled, no overhead occurs. But it still takes up around 300KB to hold the data, thus it is not recomended to keep it enabled for systems low on memory. When a '1' is echoed into the function_profile_enabled file, the counters for is function is reset back to zero. Thus you can see what functions are hit most by different programs. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 313 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 311 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7b8722baf15..11f364c776d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,6 +34,7 @@ #include #include "trace.h" +#include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ do { \ @@ -261,7 +262,6 @@ struct ftrace_func_probe { struct rcu_head rcu; }; - enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -309,6 +309,307 @@ static struct dyn_ftrace *ftrace_free_records; } \ } +#ifdef CONFIG_FUNCTION_PROFILER +static struct hlist_head *ftrace_profile_hash; +static int ftrace_profile_bits; +static int ftrace_profile_enabled; +static DEFINE_MUTEX(ftrace_profile_lock); + +static void * +function_stat_next(void *v, int idx) +{ + struct dyn_ftrace *rec = v; + struct ftrace_page *pg; + + pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + + again: + rec++; + if ((void *)rec >= (void *)&pg->records[pg->index]) { + pg = pg->next; + if (!pg) + return NULL; + rec = &pg->records[0]; + } + + if (rec->flags & FTRACE_FL_FREE || + rec->flags & FTRACE_FL_FAILED || + !(rec->flags & FTRACE_FL_CONVERTED) || + /* ignore non hit functions */ + !rec->counter) + goto again; + + return rec; +} + +static void *function_stat_start(struct tracer_stat *trace) +{ + return function_stat_next(&ftrace_pages_start->records[0], 0); +} + +static int function_stat_cmp(void *p1, void *p2) +{ + struct dyn_ftrace *a = p1; + struct dyn_ftrace *b = p2; + + if (a->counter < b->counter) + return -1; + if (a->counter > b->counter) + return 1; + else + return 0; +} + +static int function_stat_headers(struct seq_file *m) +{ + seq_printf(m, " Function Hit\n" + " -------- ---\n"); + return 0; +} + +static int function_stat_show(struct seq_file *m, void *v) +{ + struct dyn_ftrace *rec = v; + char str[KSYM_SYMBOL_LEN]; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + + seq_printf(m, " %-30.30s %10lu\n", str, rec->counter); + return 0; +} + +static struct tracer_stat function_stats = { + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show +}; + +static void ftrace_profile_init(int nr_funcs) +{ + unsigned long addr; + int order; + int size; + + /* + * We are profiling all functions, lets make it 1/4th of the + * number of functions that are in core kernel. So we have to + * iterate 4 times. + */ + order = (sizeof(struct hlist_head) * nr_funcs) / 4; + order = get_order(order); + size = 1 << (PAGE_SHIFT + order); + + pr_info("Allocating %d KB for profiler hash\n", size >> 10); + + addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!addr) { + pr_warning("Could not allocate function profiler hash\n"); + return; + } + + ftrace_profile_hash = (void *)addr; + + /* + * struct hlist_head should be a pointer of 4 or 8 bytes. + * And a simple bit manipulation can be done, but if for + * some reason struct hlist_head is not a mulitple of 2, + * then we play it safe, and simply count. This function + * is done once at boot up, so it is not that critical in + * performance. + */ + + size--; + size /= sizeof(struct hlist_head); + + for (; size; size >>= 1) + ftrace_profile_bits++; + + pr_info("Function profiler has %d hash buckets\n", + 1 << ftrace_profile_bits); + + return; +} + +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static void ftrace_profile_reset(void) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + do_for_each_ftrace_rec(pg, rec) { + rec->counter = 0; + } while_for_each_ftrace_rec(); +} + +static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +{ + struct dyn_ftrace *rec; + struct hlist_head *hhd; + struct hlist_node *n; + unsigned long flags; + unsigned long key; + + if (!ftrace_profile_hash) + return NULL; + + key = hash_long(ip, ftrace_profile_bits); + hhd = &ftrace_profile_hash[key]; + + if (hlist_empty(hhd)) + return NULL; + + local_irq_save(flags); + hlist_for_each_entry_rcu(rec, n, hhd, node) { + if (rec->ip == ip) + goto out; + } + rec = NULL; + out: + local_irq_restore(flags); + + return rec; +} + +static void +function_profile_call(unsigned long ip, unsigned long parent_ip) +{ + struct dyn_ftrace *rec; + unsigned long flags; + + if (!ftrace_profile_enabled) + return; + + local_irq_save(flags); + rec = ftrace_find_profiled_func(ip); + if (!rec) + goto out; + + rec->counter++; + out: + local_irq_restore(flags); +} + +static struct ftrace_ops ftrace_profile_ops __read_mostly = +{ + .func = function_profile_call, +}; + +static ssize_t +ftrace_profile_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + char buf[64]; + int ret; + + if (!ftrace_profile_hash) { + pr_info("Can not enable hash due to earlier problems\n"); + return -ENODEV; + } + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + val = !!val; + + mutex_lock(&ftrace_profile_lock); + if (ftrace_profile_enabled ^ val) { + if (val) { + ftrace_profile_reset(); + register_ftrace_function(&ftrace_profile_ops); + ftrace_profile_enabled = 1; + } else { + ftrace_profile_enabled = 0; + unregister_ftrace_function(&ftrace_profile_ops); + } + } + mutex_unlock(&ftrace_profile_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static const struct file_operations ftrace_profile_fops = { + .open = tracing_open_generic, + .read = ftrace_profile_read, + .write = ftrace_profile_write, +}; + +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; + int ret; + + ret = register_stat_tracer(&function_stats); + if (ret) { + pr_warning("Warning: could not register " + "function stats\n"); + return; + } + + entry = debugfs_create_file("function_profile_enabled", 0644, + d_tracer, NULL, &ftrace_profile_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'function_profile_enabled' entry\n"); +} + +static void ftrace_add_profile(struct dyn_ftrace *rec) +{ + unsigned long key; + + if (!ftrace_profile_hash) + return; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +static void ftrace_profile_release(struct dyn_ftrace *rec) +{ + mutex_lock(&ftrace_profile_lock); + hlist_del(&rec->node); + mutex_unlock(&ftrace_profile_lock); +} + +#else /* CONFIG_FUNCTION_PROFILER */ +static void ftrace_profile_init(int nr_funcs) +{ +} +static void ftrace_add_profile(struct dyn_ftrace *rec) +{ +} +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ +} +static void ftrace_profile_release(struct dyn_ftrace *rec) +{ +} +#endif /* CONFIG_FUNCTION_PROFILER */ + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -359,8 +660,10 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) + !(rec->flags & FTRACE_FL_FREE)) { ftrace_free_rec(rec); + ftrace_profile_release(rec); + } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -414,6 +717,8 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; + ftrace_add_profile(rec); + return rec; } @@ -2157,6 +2462,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + ftrace_profile_debugfs(d_tracer); + return 0; } @@ -2225,6 +2532,8 @@ void __init ftrace_init(void) if (ret) goto failed; + ftrace_profile_init(count); + last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, -- cgit v1.2.3-70-g09d2 From 493762fc534c71d11d489f872c4b4a2c61173668 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 17:12:36 -0400 Subject: tracing: move function profiler data out of function struct Impact: reduce size of memory in function profiler The function profiler originally introduces its counters into the function records itself. There is 20 thousand different functions on a normal system, and that is adding 20 thousand counters for profiling event when not needed. A normal run of the profiler yields only a couple of thousand functions executed, depending on what is being profiled. This means we have around 18 thousand useless counters. This patch rectifies this by moving the data out of the function records used by dynamic ftrace. Data is preallocated to hold the functions when the profiling begins. Checks are made during profiling to see if more recorcds should be allocated, and they are allocated if it is safe to do so. This also removes the dependency from using dynamic ftrace, and also removes the overhead by having it enabled. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 - kernel/trace/Kconfig | 10 +- kernel/trace/ftrace.c | 440 +++++++++++++++++++++++++++++-------------------- 3 files changed, 263 insertions(+), 191 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0456c3a51c6..015a3d22cf7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,10 +153,6 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long counter; - struct hlist_node node; -#endif struct dyn_arch_ftrace arch; }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 95e9ad5735d..8a4136096d7 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -379,20 +379,16 @@ config DYNAMIC_FTRACE config FUNCTION_PROFILER bool "Kernel function profiler" - depends on DYNAMIC_FTRACE + depends on FUNCTION_TRACER default n help - This option enables the kernel function profiler. When the dynamic - function tracing is enabled, a counter is added into the function - records used by the dynamic function tracer. A file is created in - debugfs called function_profile_enabled which defaults to zero. + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. When a 1 is echoed into this file profiling begins, and when a zero is entered, profiling stops. A file in the trace_stats directory called functions, that show the list of functions that have been hit and their counters. - This takes up around 320K more memory. - If in doubt, say N config FTRACE_MCOUNT_RECORD diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 11f364c776d..24dac448cdc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -241,87 +241,48 @@ static void ftrace_update_pid_func(void) #endif } -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; -static struct pid * const ftrace_swapper_pid = &init_struct_pid; - -#ifdef CONFIG_DYNAMIC_FTRACE - -#ifndef CONFIG_FTRACE_MCOUNT_RECORD -# error Dynamic ftrace depends on MCOUNT_RECORD -#endif - -static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; - -struct ftrace_func_probe { - struct hlist_node node; - struct ftrace_probe_ops *ops; - unsigned long flags; - unsigned long ip; - void *data; - struct rcu_head rcu; +#ifdef CONFIG_FUNCTION_PROFILER +struct ftrace_profile { + struct hlist_node node; + unsigned long ip; + unsigned long counter; }; -enum { - FTRACE_ENABLE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_ENABLE_MCOUNT = (1 << 3), - FTRACE_DISABLE_MCOUNT = (1 << 4), - FTRACE_START_FUNC_RET = (1 << 5), - FTRACE_STOP_FUNC_RET = (1 << 6), +struct ftrace_profile_page { + struct ftrace_profile_page *next; + unsigned long index; + struct ftrace_profile records[]; }; -static int ftrace_filtered; +#define PROFILE_RECORDS_SIZE \ + (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) -static struct dyn_ftrace *ftrace_new_addrs; +#define PROFILES_PER_PAGE \ + (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static DEFINE_MUTEX(ftrace_regex_lock); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct dyn_ftrace records[]; -}; +/* TODO: make these percpu, to prevent cache line bouncing */ +static struct ftrace_profile_page *profile_pages_start; +static struct ftrace_profile_page *profile_pages; -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -static struct dyn_ftrace *ftrace_free_records; - -/* - * This is a double for. Do not use 'break' to break out of the loop, - * you must use a goto. - */ -#define do_for_each_ftrace_rec(pg, rec) \ - for (pg = ftrace_pages_start; pg; pg = pg->next) { \ - int _____i; \ - for (_____i = 0; _____i < pg->index; _____i++) { \ - rec = &pg->records[_____i]; - -#define while_for_each_ftrace_rec() \ - } \ - } - -#ifdef CONFIG_FUNCTION_PROFILER static struct hlist_head *ftrace_profile_hash; static int ftrace_profile_bits; static int ftrace_profile_enabled; static DEFINE_MUTEX(ftrace_profile_lock); +static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); + +#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ + +static raw_spinlock_t ftrace_profile_rec_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static void * function_stat_next(void *v, int idx) { - struct dyn_ftrace *rec = v; - struct ftrace_page *pg; + struct ftrace_profile *rec = v; + struct ftrace_profile_page *pg; - pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); again: rec++; @@ -330,27 +291,22 @@ function_stat_next(void *v, int idx) if (!pg) return NULL; rec = &pg->records[0]; + if (!rec->counter) + goto again; } - if (rec->flags & FTRACE_FL_FREE || - rec->flags & FTRACE_FL_FAILED || - !(rec->flags & FTRACE_FL_CONVERTED) || - /* ignore non hit functions */ - !rec->counter) - goto again; - return rec; } static void *function_stat_start(struct tracer_stat *trace) { - return function_stat_next(&ftrace_pages_start->records[0], 0); + return function_stat_next(&profile_pages_start->records[0], 0); } static int function_stat_cmp(void *p1, void *p2) { - struct dyn_ftrace *a = p1; - struct dyn_ftrace *b = p2; + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; if (a->counter < b->counter) return -1; @@ -369,7 +325,7 @@ static int function_stat_headers(struct seq_file *m) static int function_stat_show(struct seq_file *m, void *v) { - struct dyn_ftrace *rec = v; + struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -387,115 +343,191 @@ static struct tracer_stat function_stats = { .stat_show = function_stat_show }; -static void ftrace_profile_init(int nr_funcs) +static void ftrace_profile_reset(void) { - unsigned long addr; - int order; - int size; + struct ftrace_profile_page *pg; - /* - * We are profiling all functions, lets make it 1/4th of the - * number of functions that are in core kernel. So we have to - * iterate 4 times. - */ - order = (sizeof(struct hlist_head) * nr_funcs) / 4; - order = get_order(order); - size = 1 << (PAGE_SHIFT + order); - - pr_info("Allocating %d KB for profiler hash\n", size >> 10); + pg = profile_pages = profile_pages_start; - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!addr) { - pr_warning("Could not allocate function profiler hash\n"); - return; + while (pg) { + memset(pg->records, 0, PROFILE_RECORDS_SIZE); + pg->index = 0; + pg = pg->next; } - ftrace_profile_hash = (void *)addr; + memset(ftrace_profile_hash, 0, + FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); +} - /* - * struct hlist_head should be a pointer of 4 or 8 bytes. - * And a simple bit manipulation can be done, but if for - * some reason struct hlist_head is not a mulitple of 2, - * then we play it safe, and simply count. This function - * is done once at boot up, so it is not that critical in - * performance. - */ +int ftrace_profile_pages_init(void) +{ + struct ftrace_profile_page *pg; + int i; - size--; - size /= sizeof(struct hlist_head); + /* If we already allocated, do nothing */ + if (profile_pages) + return 0; - for (; size; size >>= 1) - ftrace_profile_bits++; + profile_pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!profile_pages) + return -ENOMEM; - pr_info("Function profiler has %d hash buckets\n", - 1 << ftrace_profile_bits); + pg = profile_pages_start = profile_pages; - return; + /* allocate 10 more pages to start */ + for (i = 0; i < 10; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + /* + * We only care about allocating profile_pages, if + * we failed to allocate here, hopefully we will allocate + * later. + */ + if (!pg->next) + break; + pg = pg->next; + } + + return 0; } -static ssize_t -ftrace_profile_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +static int ftrace_profile_init(void) { - char buf[64]; - int r; + int size; - r = sprintf(buf, "%u\n", ftrace_profile_enabled); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} + if (ftrace_profile_hash) { + /* If the profile is already created, simply reset it */ + ftrace_profile_reset(); + return 0; + } -static void ftrace_profile_reset(void) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; + /* + * We are profiling all functions, but usually only a few thousand + * functions are hit. We'll make a hash of 1024 items. + */ + size = FTRACE_PROFILE_HASH_SIZE; - do_for_each_ftrace_rec(pg, rec) { - rec->counter = 0; - } while_for_each_ftrace_rec(); + ftrace_profile_hash = + kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + + if (!ftrace_profile_hash) + return -ENOMEM; + + size--; + + for (; size; size >>= 1) + ftrace_profile_bits++; + + /* Preallocate a few pages */ + if (ftrace_profile_pages_init() < 0) { + kfree(ftrace_profile_hash); + ftrace_profile_hash = NULL; + return -ENOMEM; + } + + return 0; } -static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +/* interrupts must be disabled */ +static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; struct hlist_head *hhd; struct hlist_node *n; - unsigned long flags; unsigned long key; - if (!ftrace_profile_hash) - return NULL; - key = hash_long(ip, ftrace_profile_bits); hhd = &ftrace_profile_hash[key]; if (hlist_empty(hhd)) return NULL; - local_irq_save(flags); hlist_for_each_entry_rcu(rec, n, hhd, node) { if (rec->ip == ip) - goto out; + return rec; + } + + return NULL; +} + +static void ftrace_add_profile(struct ftrace_profile *rec) +{ + unsigned long key; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +/* Interrupts must be disabled calling this */ +static struct ftrace_profile * +ftrace_profile_alloc(unsigned long ip, bool alloc_safe) +{ + struct ftrace_profile *rec = NULL; + + /* prevent recursion */ + if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) + goto out; + + __raw_spin_lock(&ftrace_profile_rec_lock); + + /* Try to always keep another page available */ + if (!profile_pages->next && alloc_safe) + profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); + + /* + * Try to find the function again since another + * task on another CPU could have added it + */ + rec = ftrace_find_profiled_func(ip); + if (rec) + goto out_unlock; + + if (profile_pages->index == PROFILES_PER_PAGE) { + if (!profile_pages->next) + goto out_unlock; + profile_pages = profile_pages->next; } - rec = NULL; + + rec = &profile_pages->records[profile_pages->index++]; + rec->ip = ip; + ftrace_add_profile(rec); + + out_unlock: + __raw_spin_unlock(&ftrace_profile_rec_lock); out: - local_irq_restore(flags); + atomic_dec(&__get_cpu_var(ftrace_profile_disable)); return rec; } +/* + * If we are not in an interrupt, or softirq and + * and interrupts are disabled and preemption is not enabled + * (not in a spinlock) then it should be safe to allocate memory. + */ +static bool ftrace_safe_to_allocate(void) +{ + return !in_interrupt() && irqs_disabled() && !preempt_count(); +} + static void function_profile_call(unsigned long ip, unsigned long parent_ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; unsigned long flags; + bool alloc_safe; if (!ftrace_profile_enabled) return; + alloc_safe = ftrace_safe_to_allocate(); + local_irq_save(flags); rec = ftrace_find_profiled_func(ip); - if (!rec) - goto out; + if (!rec) { + rec = ftrace_profile_alloc(ip, alloc_safe); + if (!rec) + goto out; + } rec->counter++; out: @@ -515,11 +547,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, char buf[64]; int ret; - if (!ftrace_profile_hash) { - pr_info("Can not enable hash due to earlier problems\n"); - return -ENODEV; - } - if (cnt >= sizeof(buf)) return -EINVAL; @@ -537,7 +564,12 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, mutex_lock(&ftrace_profile_lock); if (ftrace_profile_enabled ^ val) { if (val) { - ftrace_profile_reset(); + ret = ftrace_profile_init(); + if (ret < 0) { + cnt = ret; + goto out; + } + register_ftrace_function(&ftrace_profile_ops); ftrace_profile_enabled = 1; } else { @@ -545,6 +577,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, unregister_ftrace_function(&ftrace_profile_ops); } } + out: mutex_unlock(&ftrace_profile_lock); filp->f_pos += cnt; @@ -552,6 +585,17 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, return cnt; } +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static const struct file_operations ftrace_profile_fops = { .open = tracing_open_generic, .read = ftrace_profile_read, @@ -577,39 +621,80 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer) "'function_profile_enabled' entry\n"); } -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ - unsigned long key; - - if (!ftrace_profile_hash) - return; - - key = hash_long(rec->ip, ftrace_profile_bits); - hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); -} - -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ - mutex_lock(&ftrace_profile_lock); - hlist_del(&rec->node); - mutex_unlock(&ftrace_profile_lock); -} - #else /* CONFIG_FUNCTION_PROFILER */ -static void ftrace_profile_init(int nr_funcs) -{ -} -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ -} static void ftrace_profile_debugfs(struct dentry *d_tracer) { } -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ -} #endif /* CONFIG_FUNCTION_PROFILER */ +/* set when tracing only a pid */ +struct pid *ftrace_pid_trace; +static struct pid * const ftrace_swapper_pid = &init_struct_pid; + +#ifdef CONFIG_DYNAMIC_FTRACE + +#ifndef CONFIG_FTRACE_MCOUNT_RECORD +# error Dynamic ftrace depends on MCOUNT_RECORD +#endif + +static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; + +struct ftrace_func_probe { + struct hlist_node node; + struct ftrace_probe_ops *ops; + unsigned long flags; + unsigned long ip; + void *data; + struct rcu_head rcu; +}; + +enum { + FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_ENABLE_MCOUNT = (1 << 3), + FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), +}; + +static int ftrace_filtered; + +static struct dyn_ftrace *ftrace_new_addrs; + +static DEFINE_MUTEX(ftrace_regex_lock); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct dyn_ftrace records[]; +}; + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +static struct dyn_ftrace *ftrace_free_records; + +/* + * This is a double for. Do not use 'break' to break out of the loop, + * you must use a goto. + */ +#define do_for_each_ftrace_rec(pg, rec) \ + for (pg = ftrace_pages_start; pg; pg = pg->next) { \ + int _____i; \ + for (_____i = 0; _____i < pg->index; _____i++) { \ + rec = &pg->records[_____i]; + +#define while_for_each_ftrace_rec() \ + } \ + } + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) { + !(rec->flags & FTRACE_FL_FREE)) ftrace_free_rec(rec); - ftrace_profile_release(rec); - } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; - ftrace_add_profile(rec); - return rec; } @@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - ftrace_profile_debugfs(d_tracer); - return 0; } @@ -2532,8 +2611,6 @@ void __init ftrace_init(void) if (ret) goto failed; - ftrace_profile_init(count); - last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, @@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_pid' entry\n"); + + ftrace_profile_debugfs(d_tracer); + return 0; } fs_initcall(ftrace_init_debugfs); -- cgit v1.2.3-70-g09d2 From 0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 23:12:58 -0400 Subject: tracing: adding function timings to function profiler If the function graph trace is enabled, the function profiler will use it to take the timing of the functions. cat /debug/tracing/trace_stat/functions Function Hit Time -------- --- ---- mwait_idle 127 183028.4 us schedule 26 151997.7 us __schedule 31 151975.1 us sys_wait4 2 74080.53 us do_wait 2 74077.80 us sys_newlstat 138 39929.16 us do_path_lookup 179 39845.79 us vfs_lstat_fd 138 39761.97 us user_path_at 153 39469.58 us path_walk 179 39435.76 us __link_path_walk 189 39143.73 us [...] Note the times are skewed due to the function graph tracer not taking into account schedules. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 93 ++++++++++++++++++++++++++++++++++-- kernel/trace/trace.c | 11 ----- kernel/trace/trace.h | 3 +- kernel/trace/trace_functions_graph.c | 17 +++++-- kernel/trace/trace_output.c | 10 ++++ kernel/trace/trace_output.h | 2 + 6 files changed, 117 insertions(+), 19 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 24dac448cdc..a9ccd71fc92 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -33,7 +33,7 @@ #include -#include "trace.h" +#include "trace_output.h" #include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ @@ -246,6 +246,9 @@ struct ftrace_profile { struct hlist_node node; unsigned long ip; unsigned long counter; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned long long time; +#endif }; struct ftrace_profile_page { @@ -303,6 +306,22 @@ static void *function_stat_start(struct tracer_stat *trace) return function_stat_next(&profile_pages_start->records[0], 0); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* function graph compares on total time */ +static int function_stat_cmp(void *p1, void *p2) +{ + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; + + if (a->time < b->time) + return -1; + if (a->time > b->time) + return 1; + else + return 0; +} +#else +/* not function graph compares against hits */ static int function_stat_cmp(void *p1, void *p2) { struct ftrace_profile *a = p1; @@ -315,11 +334,17 @@ static int function_stat_cmp(void *p1, void *p2) else return 0; } +#endif static int function_stat_headers(struct seq_file *m) { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + seq_printf(m, " Function Hit Time\n" + " -------- --- ----\n"); +#else seq_printf(m, " Function Hit\n" " -------- ---\n"); +#endif return 0; } @@ -327,10 +352,25 @@ static int function_stat_show(struct seq_file *m, void *v) { struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + static struct trace_seq s; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + trace_seq_init(&s); + trace_print_graph_duration(rec->time, &s); +#endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + seq_printf(m, " %-30.30s %10lu", str, rec->counter); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + seq_printf(m, " "); + trace_print_seq(m, &s); + mutex_unlock(&mutex); +#endif + seq_putc(m, '\n'); - seq_printf(m, " %-30.30s %10lu\n", str, rec->counter); return 0; } @@ -534,11 +574,52 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int profile_graph_entry(struct ftrace_graph_ent *trace) +{ + function_profile_call(trace->func, 0); + return 1; +} + +static void profile_graph_return(struct ftrace_graph_ret *trace) +{ + unsigned long flags; + struct ftrace_profile *rec; + + local_irq_save(flags); + rec = ftrace_find_profiled_func(trace->func); + if (rec) + rec->time += trace->rettime - trace->calltime; + local_irq_restore(flags); +} + +static int register_ftrace_profiler(void) +{ + return register_ftrace_graph(&profile_graph_return, + &profile_graph_entry); +} + +static void unregister_ftrace_profiler(void) +{ + unregister_ftrace_graph(); +} +#else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, }; +static int register_ftrace_profiler(void) +{ + return register_ftrace_function(&ftrace_profile_ops); +} + +static void unregister_ftrace_profiler(void) +{ + unregister_ftrace_function(&ftrace_profile_ops); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static ssize_t ftrace_profile_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -570,11 +651,15 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, goto out; } - register_ftrace_function(&ftrace_profile_ops); + ret = register_ftrace_profiler(); + if (ret < 0) { + cnt = ret; + goto out; + } ftrace_profile_enabled = 1; } else { ftrace_profile_enabled = 0; - unregister_ftrace_function(&ftrace_profile_ops); + unregister_ftrace_profiler(); } } out: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 67c6a21dd42..821bf49771d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -402,17 +402,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) return cnt; } -static void -trace_print_seq(struct seq_file *m, struct trace_seq *s) -{ - int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; - - s->buffer[len] = 0; - seq_puts(m, s->buffer); - - trace_seq_init(s); -} - /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d7410bbb9a8..c66ca3b6605 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -605,6 +605,8 @@ extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern enum print_line_t print_graph_function(struct trace_iterator *iter); +extern enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ @@ -636,7 +638,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 1; } #endif /* CONFIG_DYNAMIC_FTRACE */ - #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function(struct trace_iterator *iter) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d28687e7b3a..85bba0f018b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -426,8 +426,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_HANDLED; } -static enum print_line_t -print_graph_duration(unsigned long long duration, struct trace_seq *s) +enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); /* log10(ULONG_MAX) + '\0' */ @@ -464,12 +464,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) if (!ret) return TRACE_TYPE_PARTIAL_LINE; } + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_duration(unsigned long long duration, struct trace_seq *s) +{ + int ret; + + ret = trace_print_graph_duration(duration, s); + if (ret != TRACE_TYPE_HANDLED) + return ret; ret = trace_seq_printf(s, "| "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + return TRACE_TYPE_HANDLED; } /* Case of a leaf function on its call entry */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 19261fdd245..a3b6e3fd704 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -19,6 +19,16 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; +void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + + s->buffer[len] = 0; + seq_puts(m, s->buffer); + + trace_seq_init(s); +} + enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 35c422fb51a..1eac2973374 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -20,6 +20,8 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); + extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); extern int -- cgit v1.2.3-70-g09d2 From cafb168a1c92e4c9e1731fe3d666c39611762c49 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 20:50:39 -0400 Subject: tracing: make the function profiler per cpu Impact: speed enhancement By making the function profiler record in per cpu data we not only get better readings, avoid races, we also do not have to take any locks. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 199 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 130 insertions(+), 69 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a9ccd71fc92..ed1fc5021d4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -257,28 +257,28 @@ struct ftrace_profile_page { struct ftrace_profile records[]; }; +struct ftrace_profile_stat { + atomic_t disabled; + struct hlist_head *hash; + struct ftrace_profile_page *pages; + struct ftrace_profile_page *start; + struct tracer_stat stat; +}; + #define PROFILE_RECORDS_SIZE \ (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) #define PROFILES_PER_PAGE \ (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -/* TODO: make these percpu, to prevent cache line bouncing */ -static struct ftrace_profile_page *profile_pages_start; -static struct ftrace_profile_page *profile_pages; - -static struct hlist_head *ftrace_profile_hash; static int ftrace_profile_bits; static int ftrace_profile_enabled; static DEFINE_MUTEX(ftrace_profile_lock); -static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); +static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ -static raw_spinlock_t ftrace_profile_rec_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; - static void * function_stat_next(void *v, int idx) { @@ -303,7 +303,13 @@ function_stat_next(void *v, int idx) static void *function_stat_start(struct tracer_stat *trace) { - return function_stat_next(&profile_pages_start->records[0], 0); + struct ftrace_profile_stat *stat = + container_of(trace, struct ftrace_profile_stat, stat); + + if (!stat || !stat->start) + return NULL; + + return function_stat_next(&stat->start->records[0], 0); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -374,20 +380,11 @@ static int function_stat_show(struct seq_file *m, void *v) return 0; } -static struct tracer_stat function_stats = { - .name = "functions", - .stat_start = function_stat_start, - .stat_next = function_stat_next, - .stat_cmp = function_stat_cmp, - .stat_headers = function_stat_headers, - .stat_show = function_stat_show -}; - -static void ftrace_profile_reset(void) +static void ftrace_profile_reset(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; - pg = profile_pages = profile_pages_start; + pg = stat->pages = stat->start; while (pg) { memset(pg->records, 0, PROFILE_RECORDS_SIZE); @@ -395,24 +392,24 @@ static void ftrace_profile_reset(void) pg = pg->next; } - memset(ftrace_profile_hash, 0, + memset(stat->hash, 0, FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); } -int ftrace_profile_pages_init(void) +int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; int i; /* If we already allocated, do nothing */ - if (profile_pages) + if (stat->pages) return 0; - profile_pages = (void *)get_zeroed_page(GFP_KERNEL); - if (!profile_pages) + stat->pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!stat->pages) return -ENOMEM; - pg = profile_pages_start = profile_pages; + pg = stat->start = stat->pages; /* allocate 10 more pages to start */ for (i = 0; i < 10; i++) { @@ -430,13 +427,16 @@ int ftrace_profile_pages_init(void) return 0; } -static int ftrace_profile_init(void) +static int ftrace_profile_init_cpu(int cpu) { + struct ftrace_profile_stat *stat; int size; - if (ftrace_profile_hash) { + stat = &per_cpu(ftrace_profile_stats, cpu); + + if (stat->hash) { /* If the profile is already created, simply reset it */ - ftrace_profile_reset(); + ftrace_profile_reset(stat); return 0; } @@ -446,29 +446,45 @@ static int ftrace_profile_init(void) */ size = FTRACE_PROFILE_HASH_SIZE; - ftrace_profile_hash = - kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); - if (!ftrace_profile_hash) + if (!stat->hash) return -ENOMEM; - size--; + if (!ftrace_profile_bits) { + size--; - for (; size; size >>= 1) - ftrace_profile_bits++; + for (; size; size >>= 1) + ftrace_profile_bits++; + } /* Preallocate a few pages */ - if (ftrace_profile_pages_init() < 0) { - kfree(ftrace_profile_hash); - ftrace_profile_hash = NULL; + if (ftrace_profile_pages_init(stat) < 0) { + kfree(stat->hash); + stat->hash = NULL; return -ENOMEM; } return 0; } +static int ftrace_profile_init(void) +{ + int cpu; + int ret = 0; + + for_each_online_cpu(cpu) { + ret = ftrace_profile_init_cpu(cpu); + if (ret) + break; + } + + return ret; +} + /* interrupts must be disabled */ -static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) +static struct ftrace_profile * +ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) { struct ftrace_profile *rec; struct hlist_head *hhd; @@ -476,7 +492,7 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) unsigned long key; key = hash_long(ip, ftrace_profile_bits); - hhd = &ftrace_profile_hash[key]; + hhd = &stat->hash[key]; if (hlist_empty(hhd)) return NULL; @@ -489,52 +505,50 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) return NULL; } -static void ftrace_add_profile(struct ftrace_profile *rec) +static void ftrace_add_profile(struct ftrace_profile_stat *stat, + struct ftrace_profile *rec) { unsigned long key; key = hash_long(rec->ip, ftrace_profile_bits); - hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); + hlist_add_head_rcu(&rec->node, &stat->hash[key]); } /* Interrupts must be disabled calling this */ static struct ftrace_profile * -ftrace_profile_alloc(unsigned long ip, bool alloc_safe) +ftrace_profile_alloc(struct ftrace_profile_stat *stat, + unsigned long ip, bool alloc_safe) { struct ftrace_profile *rec = NULL; /* prevent recursion */ - if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) + if (atomic_inc_return(&stat->disabled) != 1) goto out; - __raw_spin_lock(&ftrace_profile_rec_lock); - /* Try to always keep another page available */ - if (!profile_pages->next && alloc_safe) - profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); + if (!stat->pages->next && alloc_safe) + stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC); /* * Try to find the function again since another * task on another CPU could have added it */ - rec = ftrace_find_profiled_func(ip); + rec = ftrace_find_profiled_func(stat, ip); if (rec) - goto out_unlock; + goto out; - if (profile_pages->index == PROFILES_PER_PAGE) { - if (!profile_pages->next) - goto out_unlock; - profile_pages = profile_pages->next; + if (stat->pages->index == PROFILES_PER_PAGE) { + if (!stat->pages->next) + goto out; + stat->pages = stat->pages->next; } - rec = &profile_pages->records[profile_pages->index++]; + rec = &stat->pages->records[stat->pages->index++]; rec->ip = ip; - ftrace_add_profile(rec); + ftrace_add_profile(stat, rec); - out_unlock: - __raw_spin_unlock(&ftrace_profile_rec_lock); out: - atomic_dec(&__get_cpu_var(ftrace_profile_disable)); + atomic_dec(&stat->disabled); return rec; } @@ -552,6 +566,7 @@ static bool ftrace_safe_to_allocate(void) static void function_profile_call(unsigned long ip, unsigned long parent_ip) { + struct ftrace_profile_stat *stat; struct ftrace_profile *rec; unsigned long flags; bool alloc_safe; @@ -562,9 +577,14 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) alloc_safe = ftrace_safe_to_allocate(); local_irq_save(flags); - rec = ftrace_find_profiled_func(ip); + + stat = &__get_cpu_var(ftrace_profile_stats); + if (!stat->hash) + goto out; + + rec = ftrace_find_profiled_func(stat, ip); if (!rec) { - rec = ftrace_profile_alloc(ip, alloc_safe); + rec = ftrace_profile_alloc(stat, ip, alloc_safe); if (!rec) goto out; } @@ -583,13 +603,19 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { - unsigned long flags; + struct ftrace_profile_stat *stat; struct ftrace_profile *rec; + unsigned long flags; local_irq_save(flags); - rec = ftrace_find_profiled_func(trace->func); + stat = &__get_cpu_var(ftrace_profile_stats); + if (!stat->hash) + goto out; + + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) rec->time += trace->rettime - trace->calltime; + out: local_irq_restore(flags); } @@ -687,16 +713,51 @@ static const struct file_operations ftrace_profile_fops = { .write = ftrace_profile_write, }; +/* used to initialize the real stat files */ +static struct tracer_stat function_stats __initdata = { + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show +}; + static void ftrace_profile_debugfs(struct dentry *d_tracer) { + struct ftrace_profile_stat *stat; struct dentry *entry; + char *name; int ret; + int cpu; - ret = register_stat_tracer(&function_stats); - if (ret) { - pr_warning("Warning: could not register " - "function stats\n"); - return; + for_each_possible_cpu(cpu) { + stat = &per_cpu(ftrace_profile_stats, cpu); + + /* allocate enough for function name + cpu number */ + name = kmalloc(32, GFP_KERNEL); + if (!name) { + /* + * The files created are permanent, if something happens + * we still do not free memory. + */ + kfree(stat); + WARN(1, + "Could not allocate stat file for cpu %d\n", + cpu); + return; + } + stat->stat = function_stats; + snprintf(name, 32, "function%d", cpu); + stat->stat.name = name; + ret = register_stat_tracer(&stat->stat); + if (ret) { + WARN(1, + "Could not register function stat for cpu %d\n", + cpu); + kfree(name); + return; + } } entry = debugfs_create_file("function_profile_enabled", 0644, -- cgit v1.2.3-70-g09d2 From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 23:17:58 -0400 Subject: function-graph: add option to calculate graph time or not graph time is the time that a function is executing another function. Thus if function A calls B, if graph-time is set, then the time for A includes B. This is the default behavior. But if graph-time is off, then the time spent executing B is subtracted from A. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 21 ++++++++++++++++++++- kernel/trace/trace.c | 4 +++- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 8 ++++---- 5 files changed, 29 insertions(+), 8 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf7..9e0a8d245e5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -365,6 +365,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -376,8 +377,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ed1fc5021d4..71e5faef12a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { struct ftrace_profile_stat *stat; + unsigned long long calltime; struct ftrace_profile *rec; unsigned long flags; @@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash) goto out; + calltime = trace->rettime - trace->calltime; + + if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + int index; + + index = trace->depth; + + /* Append this call time to the parent time to subtract */ + if (index) + current->ret_stack[index - 1].subtime += calltime; + + if (current->ret_stack[index].subtime < calltime) + calltime -= current->ret_stack[index].subtime; + else + calltime = 0; + } + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) - rec->time += trace->rettime - trace->calltime; + rec->time += calltime; + out: local_irq_restore(flags); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 821bf49771d..5d1a16cae37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | + TRACE_ITER_GRAPH_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -317,6 +318,7 @@ static const char *trace_options[] = { "latency-format", "global-clock", "sleep-time", + "graph-time", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c66ca3b6605..e3429a8ab05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -685,6 +685,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 85bba0f018b..10f6ad7d85f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; + current->ret_stack[index].subtime = 0; *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -void +static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; @@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; - barrier(); - current->curr_ret_stack--; - } /* @@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void) ftrace_pop_return_trace(&trace, &ret); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); + barrier(); + current->curr_ret_stack--; if (unlikely(!ret)) { ftrace_graph_stop(); -- cgit v1.2.3-70-g09d2 From fb9fb015e92123fa3a8e0c2e2fff491d4a56b470 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 13:26:41 -0400 Subject: tracing: clean up tracing profiler Ingo Molnar suggested clean ups for the profiling code. This patch makes those updates. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 71e5faef12a..a141d8499ab 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -69,7 +69,7 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, + .func = ftrace_stub, }; static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; @@ -271,8 +271,10 @@ struct ftrace_profile_stat { #define PROFILES_PER_PAGE \ (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static int ftrace_profile_bits; -static int ftrace_profile_enabled; +static int ftrace_profile_bits __read_mostly; +static int ftrace_profile_enabled __read_mostly; + +/* ftrace_profile_lock - synchronize the enable and disable of the profiler */ static DEFINE_MUTEX(ftrace_profile_lock); static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); @@ -651,7 +653,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { - .func = function_profile_call, + .func = function_profile_call, }; static int register_ftrace_profiler(void) @@ -670,7 +672,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { unsigned long val; - char buf[64]; + char buf[64]; /* big enough to hold a number */ int ret; if (cnt >= sizeof(buf)) @@ -719,7 +721,7 @@ static ssize_t ftrace_profile_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[64]; + char buf[64]; /* big enough to hold a number */ int r; r = sprintf(buf, "%u\n", ftrace_profile_enabled); @@ -734,12 +736,12 @@ static const struct file_operations ftrace_profile_fops = { /* used to initialize the real stat files */ static struct tracer_stat function_stats __initdata = { - .name = "functions", - .stat_start = function_stat_start, - .stat_next = function_stat_next, - .stat_cmp = function_stat_cmp, - .stat_headers = function_stat_headers, - .stat_show = function_stat_show + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show }; static void ftrace_profile_debugfs(struct dentry *d_tracer) @@ -1954,7 +1956,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_probe_ops __read_mostly = { - .func = function_trace_probe_call, + .func = function_trace_probe_call, }; static int ftrace_probe_registered; -- cgit v1.2.3-70-g09d2 From 318e0a73c9e41b9a17241829bcd0605a39b87cb9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 20:06:34 -0400 Subject: tracing: remove on the fly allocator from function profiler Impact: safer code The on the fly allocator for the function profiler was to save memory. But at the expense of stability. Although it survived several tests, allocating from the function tracer is just too risky, just to save space. This patch removes the allocator and simply allocates enough entries at start up. Each function gets a profiling structure of 40 bytes. With an average of 20K functions, and this is for each CPU, we have 800K per online CPU. This is not too bad, at least for non-embedded. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 76 +++++++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 33 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a141d8499ab..4d90c916b2b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -401,6 +401,8 @@ static void ftrace_profile_reset(struct ftrace_profile_stat *stat) int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; + int functions; + int pages; int i; /* If we already allocated, do nothing */ @@ -411,22 +413,46 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) if (!stat->pages) return -ENOMEM; +#ifdef CONFIG_DYNAMIC_FTRACE + functions = ftrace_update_tot_cnt; +#else + /* + * We do not know the number of functions that exist because + * dynamic tracing is what counts them. With past experience + * we have around 20K functions. That should be more than enough. + * It is highly unlikely we will execute every function in + * the kernel. + */ + functions = 20000; +#endif + pg = stat->start = stat->pages; - /* allocate 10 more pages to start */ - for (i = 0; i < 10; i++) { + pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); + + for (i = 0; i < pages; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); - /* - * We only care about allocating profile_pages, if - * we failed to allocate here, hopefully we will allocate - * later. - */ if (!pg->next) - break; + goto out_free; pg = pg->next; } return 0; + + out_free: + pg = stat->start; + while (pg) { + unsigned long tmp = (unsigned long)pg; + + pg = pg->next; + free_page(tmp); + } + + free_page((unsigned long)stat->pages); + stat->pages = NULL; + stat->start = NULL; + + return -ENOMEM; } static int ftrace_profile_init_cpu(int cpu) @@ -460,7 +486,7 @@ static int ftrace_profile_init_cpu(int cpu) ftrace_profile_bits++; } - /* Preallocate a few pages */ + /* Preallocate the function profiling pages */ if (ftrace_profile_pages_init(stat) < 0) { kfree(stat->hash); stat->hash = NULL; @@ -516,24 +542,21 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat, hlist_add_head_rcu(&rec->node, &stat->hash[key]); } -/* Interrupts must be disabled calling this */ +/* + * The memory is already allocated, this simply finds a new record to use. + */ static struct ftrace_profile * -ftrace_profile_alloc(struct ftrace_profile_stat *stat, - unsigned long ip, bool alloc_safe) +ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) { struct ftrace_profile *rec = NULL; - /* prevent recursion */ + /* prevent recursion (from NMIs) */ if (atomic_inc_return(&stat->disabled) != 1) goto out; - /* Try to always keep another page available */ - if (!stat->pages->next && alloc_safe) - stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC); - /* - * Try to find the function again since another - * task on another CPU could have added it + * Try to find the function again since an NMI + * could have added it */ rec = ftrace_find_profiled_func(stat, ip); if (rec) @@ -555,29 +578,16 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, return rec; } -/* - * If we are not in an interrupt, or softirq and - * and interrupts are disabled and preemption is not enabled - * (not in a spinlock) then it should be safe to allocate memory. - */ -static bool ftrace_safe_to_allocate(void) -{ - return !in_interrupt() && irqs_disabled() && !preempt_count(); -} - static void function_profile_call(unsigned long ip, unsigned long parent_ip) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; unsigned long flags; - bool alloc_safe; if (!ftrace_profile_enabled) return; - alloc_safe = ftrace_safe_to_allocate(); - local_irq_save(flags); stat = &__get_cpu_var(ftrace_profile_stats); @@ -586,7 +596,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) rec = ftrace_find_profiled_func(stat, ip); if (!rec) { - rec = ftrace_profile_alloc(stat, ip, alloc_safe); + rec = ftrace_profile_alloc(stat, ip); if (!rec) goto out; } -- cgit v1.2.3-70-g09d2 From 34886c8bc590f078d4c0b88f50d061326639198d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 21:00:47 -0400 Subject: tracing: add average time in function to function profiler Show the average time in the function (Time / Hit) Function Hit Time Avg -------- --- ---- --- mwait_idle 51 140326.6 us 2751.503 us smp_apic_timer_interrupt 47 3517.735 us 74.845 us schedule 10 2738.754 us 273.875 us __schedule 10 2732.857 us 273.285 us hrtimer_interrupt 47 1896.104 us 40.342 us irq_exit 56 1711.833 us 30.568 us __run_hrtimer 47 1315.589 us 27.991 us tick_sched_timer 47 1138.690 us 24.227 us do_softirq 56 1116.829 us 19.943 us __do_softirq 56 1066.932 us 19.052 us do_IRQ 9 926.153 us 102.905 us Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4d90c916b2b..c7f4a4be05d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -347,8 +347,10 @@ static int function_stat_cmp(void *p1, void *p2) static int function_stat_headers(struct seq_file *m) { #ifdef CONFIG_FUNCTION_GRAPH_TRACER - seq_printf(m, " Function Hit Time\n" - " -------- --- ----\n"); + seq_printf(m, " Function " + "Hit Time Avg\n" + " -------- " + "--- ---- ---\n"); #else seq_printf(m, " Function Hit\n" " -------- ---\n"); @@ -361,12 +363,9 @@ static int function_stat_show(struct seq_file *m, void *v) struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - static struct trace_seq s; static DEFINE_MUTEX(mutex); - - mutex_lock(&mutex); - trace_seq_init(&s); - trace_print_graph_duration(rec->time, &s); + static struct trace_seq s; + unsigned long long avg; #endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -374,6 +373,14 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_printf(m, " "); + avg = rec->time; + do_div(avg, rec->counter); + + mutex_lock(&mutex); + trace_seq_init(&s); + trace_print_graph_duration(rec->time, &s); + trace_seq_puts(&s, " "); + trace_print_graph_duration(avg, &s); trace_print_seq(m, &s); mutex_unlock(&mutex); #endif -- cgit v1.2.3-70-g09d2 From 5452af664f6fba26b80eb2c8c4ceae2999d5cf56 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Mar 2009 00:25:38 +0100 Subject: tracing/ftrace: factorize the tracing files creation Impact: cleanup Most of the tracing files creation follow the same pattern: ret = debugfs_create_file(...) if (!ret) pr_warning("Couldn't create ... entry\n") Unify it! Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker LKML-Reference: <1238109938-11840-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 39 +++------ kernel/trace/ring_buffer.c | 7 +- kernel/trace/trace.c | 159 +++++++++++++++---------------------- kernel/trace/trace.h | 6 ++ kernel/trace/trace_event_profile.c | 1 - kernel/trace/trace_printk.c | 6 +- kernel/trace/trace_stack.c | 13 +-- kernel/trace/trace_sysprof.c | 6 +- 8 files changed, 86 insertions(+), 151 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 678e3d6caf8..6ea5a1ae6a9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2698,38 +2698,23 @@ static const struct file_operations ftrace_graph_fops = { static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("available_filter_functions", 0444, - d_tracer, NULL, &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_filter_functions' entry\n"); + trace_create_file("available_filter_functions", 0444, + d_tracer, NULL, &ftrace_avail_fops); - entry = debugfs_create_file("failures", 0444, - d_tracer, NULL, &ftrace_failures_fops); - if (!entry) - pr_warning("Could not create debugfs 'failures' entry\n"); + trace_create_file("failures", 0444, + d_tracer, NULL, &ftrace_failures_fops); - entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, - NULL, &ftrace_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_filter' entry\n"); + trace_create_file("set_ftrace_filter", 0644, d_tracer, + NULL, &ftrace_filter_fops); - entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, + trace_create_file("set_ftrace_notrace", 0644, d_tracer, NULL, &ftrace_notrace_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_notrace' entry\n"); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + trace_create_file("set_graph_function", 0444, d_tracer, NULL, &ftrace_graph_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ return 0; @@ -2987,7 +2972,6 @@ static const struct file_operations ftrace_pid_fops = { static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -2995,11 +2979,8 @@ static __init int ftrace_init_debugfs(void) ftrace_init_dyn_debugfs(d_tracer); - entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, - NULL, &ftrace_pid_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_pid' entry\n"); + trace_create_file("set_ftrace_pid", 0644, d_tracer, + NULL, &ftrace_pid_fops); ftrace_profile_debugfs(d_tracer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 960cbf44c84..74a11808c28 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2845,14 +2845,11 @@ static const struct file_operations rb_simple_fops = { static __init int rb_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_on' entry\n"); + trace_create_file("tracing_on", 0644, d_tracer, + &ring_buffer_flags, &rb_simple_fops); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 32653c8c6e2..0615751a3ed 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3581,7 +3581,7 @@ struct dentry *tracing_dentry_percpu(void) static void tracing_init_debugfs_percpu(long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(); - struct dentry *entry, *d_cpu; + struct dentry *d_cpu; /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ char cpu_dir[7]; @@ -3596,21 +3596,15 @@ static void tracing_init_debugfs_percpu(long cpu) } /* per cpu trace_pipe */ - entry = debugfs_create_file("trace_pipe", 0444, d_cpu, - (void *) cpu, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe' entry\n"); + trace_create_file("trace_pipe", 0444, d_cpu, + (void *) cpu, &tracing_pipe_fops); /* per cpu trace */ - entry = debugfs_create_file("trace", 0644, d_cpu, - (void *) cpu, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); + trace_create_file("trace", 0644, d_cpu, + (void *) cpu, &tracing_fops); - entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, - (void *) cpu, &tracing_buffers_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n"); + trace_create_file("trace_pipe_raw", 0444, d_cpu, + (void *) cpu, &tracing_buffers_fops); } #ifdef CONFIG_FTRACE_SELFTEST @@ -3766,6 +3760,22 @@ static const struct file_operations trace_options_core_fops = { .write = trace_options_core_write, }; +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + struct dentry *ret; + + ret = debugfs_create_file(name, mode, parent, data, fops); + if (!ret) + pr_warning("Could not create debugfs '%s' entry\n", name); + + return ret; +} + + static struct dentry *trace_options_init_dentry(void) { struct dentry *d_tracer; @@ -3793,7 +3803,6 @@ create_trace_option_file(struct trace_option_dentry *topt, struct tracer_opt *opt) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) @@ -3802,11 +3811,9 @@ create_trace_option_file(struct trace_option_dentry *topt, topt->flags = flags; topt->opt = opt; - entry = debugfs_create_file(opt->name, 0644, t_options, topt, + topt->entry = trace_create_file(opt->name, 0644, t_options, topt, &trace_options_fops); - topt->entry = entry; - } static struct trace_option_dentry * @@ -3861,123 +3868,81 @@ static struct dentry * create_trace_option_core_file(const char *option, long index) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) return NULL; - entry = debugfs_create_file(option, 0644, t_options, (void *)index, + return trace_create_file(option, 0644, t_options, (void *)index, &trace_options_core_fops); - - return entry; } static __init void create_trace_options_dir(void) { struct dentry *t_options; - struct dentry *entry; int i; t_options = trace_options_init_dentry(); if (!t_options) return; - for (i = 0; trace_options[i]; i++) { - entry = create_trace_option_core_file(trace_options[i], i); - if (!entry) - pr_warning("Could not create debugfs %s entry\n", - trace_options[i]); - } + for (i = 0; trace_options[i]; i++) + create_trace_option_core_file(trace_options[i], i); } static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; int cpu; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, - &global_trace, &tracing_ctrl_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); + trace_create_file("tracing_enabled", 0644, d_tracer, + &global_trace, &tracing_ctrl_fops); - entry = debugfs_create_file("trace_options", 0644, d_tracer, - NULL, &tracing_iter_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_options' entry\n"); + trace_create_file("trace_options", 0644, d_tracer, + NULL, &tracing_iter_fops); - create_trace_options_dir(); + trace_create_file("tracing_cpumask", 0644, d_tracer, + NULL, &tracing_cpumask_fops); + + trace_create_file("trace", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); + + trace_create_file("available_tracers", 0444, d_tracer, + &global_trace, &show_traces_fops); + + trace_create_file("current_tracer", 0444, d_tracer, + &global_trace, &set_tracer_fops); + + trace_create_file("tracing_max_latency", 0644, d_tracer, + &tracing_max_latency, &tracing_max_lat_fops); + + trace_create_file("tracing_thresh", 0644, d_tracer, + &tracing_thresh, &tracing_max_lat_fops); - entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, - NULL, &tracing_cpumask_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); - - entry = debugfs_create_file("trace", 0644, d_tracer, - (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); - - entry = debugfs_create_file("available_tracers", 0444, d_tracer, - &global_trace, &show_traces_fops); - if (!entry) - pr_warning("Could not create debugfs 'available_tracers' entry\n"); - - entry = debugfs_create_file("current_tracer", 0444, d_tracer, - &global_trace, &set_tracer_fops); - if (!entry) - pr_warning("Could not create debugfs 'current_tracer' entry\n"); - - entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, - &tracing_max_latency, - &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_max_latency' entry\n"); - - entry = debugfs_create_file("tracing_thresh", 0644, d_tracer, - &tracing_thresh, &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_thresh' entry\n"); - entry = debugfs_create_file("README", 0644, d_tracer, - NULL, &tracing_readme_fops); - if (!entry) - pr_warning("Could not create debugfs 'README' entry\n"); - - entry = debugfs_create_file("trace_pipe", 0444, d_tracer, + trace_create_file("README", 0644, d_tracer, + NULL, &tracing_readme_fops); + + trace_create_file("trace_pipe", 0444, d_tracer, (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_pipe' entry\n"); - - entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, - &global_trace, &tracing_entries_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'buffer_size_kb' entry\n"); - - entry = debugfs_create_file("trace_marker", 0220, d_tracer, - NULL, &tracing_mark_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_marker' entry\n"); + + trace_create_file("buffer_size_kb", 0644, d_tracer, + &global_trace, &tracing_entries_fops); + + trace_create_file("trace_marker", 0220, d_tracer, + NULL, &tracing_mark_fops); #ifdef CONFIG_DYNAMIC_FTRACE - entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, - &ftrace_update_tot_cnt, - &tracing_dyn_info_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'dyn_ftrace_total_info' entry\n"); + trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, + &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif #ifdef CONFIG_SYSPROF_TRACER init_tracer_sysprof_debugfs(d_tracer); #endif + create_trace_options_dir(); + for_each_tracing_cpu(cpu) tracing_init_debugfs_percpu(cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 47aa6d0c97a..f76a8f8689d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -470,6 +470,12 @@ void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops); + struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 22cba997077..199de9c7422 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -28,4 +28,3 @@ void ftrace_profile_disable(int event_id) return event->profile_disable(event); } } - diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index eb81556107f..9bece9687b6 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = { static __init int init_trace_printk_function_export(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) return 0; - entry = debugfs_create_file("printk_formats", 0444, d_tracer, + trace_create_file("printk_formats", 0444, d_tracer, NULL, &ftrace_formats_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'printk_formats' entry\n"); return 0; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c750f65f966..1796f00524e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace); static __init int stack_trace_init(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("stack_max_size", 0644, d_tracer, - &max_stack_size, &stack_max_size_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_max_size' entry\n"); + trace_create_file("stack_max_size", 0644, d_tracer, + &max_stack_size, &stack_max_size_fops); - entry = debugfs_create_file("stack_trace", 0444, d_tracer, - NULL, &stack_trace_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_trace' entry\n"); + trace_create_file("stack_trace", 0444, d_tracer, + NULL, &stack_trace_fops); if (stack_tracer_enabled) register_ftrace_function(&trace_ops); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 91fd19c2149..e04b76cc238 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = { void init_tracer_sysprof_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("sysprof_sample_period", 0644, + trace_create_file("sysprof_sample_period", 0644, d_tracer, NULL, &sysprof_sample_fops); - if (entry) - return; - pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n"); } -- cgit v1.2.3-70-g09d2 From 597af81537654097b67fd7a0c92775e66d4a86fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 3 Apr 2009 15:24:12 -0400 Subject: function-graph: use int instead of atomic for ftrace_graph_active Impact: cleanup The variable ftrace_graph_active is only modified under the ftrace_lock mutex, thus an atomic is not necessary for modification. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6ea5a1ae6a9..8e6a0b5c994 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3092,7 +3092,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_graph_active; +static int ftrace_graph_active; static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) @@ -3244,7 +3244,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_lock); /* we currently allow only one tracer registered at a time */ - if (atomic_read(&ftrace_graph_active)) { + if (ftrace_graph_active) { ret = -EBUSY; goto out; } @@ -3252,10 +3252,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; register_pm_notifier(&ftrace_suspend_notifier); - atomic_inc(&ftrace_graph_active); + ftrace_graph_active++; ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_graph_active); + ftrace_graph_active--; goto out; } @@ -3273,10 +3273,10 @@ void unregister_ftrace_graph(void) { mutex_lock(&ftrace_lock); - if (!unlikely(atomic_read(&ftrace_graph_active))) + if (unlikely(!ftrace_graph_active)) goto out; - atomic_dec(&ftrace_graph_active); + ftrace_graph_active--; unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; @@ -3290,7 +3290,7 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_graph_active)) { + if (ftrace_graph_active) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 19:39:12 -0400 Subject: tracing/events: move trace point headers into include/trace/events Impact: clean up Create a sub directory in include/trace called events to keep the trace point headers in their own separate directory. Only headers that declare trace points should be defined in this directory. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/linux/kmemtrace.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/irq.h | 57 +++++++ include/trace/events/kmem.h | 194 ++++++++++++++++++++++ include/trace/events/lockdep.h | 60 +++++++ include/trace/events/sched.h | 339 ++++++++++++++++++++++++++++++++++++++ include/trace/events/skb.h | 40 +++++ include/trace/irq.h | 57 ------- include/trace/kmem.h | 194 ---------------------- include/trace/lockdep.h | 60 ------- include/trace/sched.h | 339 -------------------------------------- include/trace/skb.h | 40 ----- kernel/exit.c | 2 +- kernel/fork.c | 3 +- kernel/irq/handle.c | 2 +- kernel/kthread.c | 2 +- kernel/lockdep.c | 2 +- kernel/sched.c | 2 +- kernel/signal.c | 2 +- kernel/softirq.c | 2 +- kernel/trace/ftrace.c | 2 +- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- mm/util.c | 2 +- net/core/drop_monitor.c | 2 +- net/core/net-traces.c | 2 +- net/core/skbuff.c | 2 +- 27 files changed, 708 insertions(+), 707 deletions(-) create mode 100644 include/trace/events/irq.h create mode 100644 include/trace/events/kmem.h create mode 100644 include/trace/events/lockdep.h create mode 100644 include/trace/events/sched.h create mode 100644 include/trace/events/skb.h delete mode 100644 include/trace/irq.h delete mode 100644 include/trace/kmem.h delete mode 100644 include/trace/lockdep.h delete mode 100644 include/trace/sched.h delete mode 100644 include/trace/skb.h (limited to 'kernel/trace/ftrace.c') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h index 15c45a27a92..b616d3930c3 100644 --- a/include/linux/kmemtrace.h +++ b/include/linux/kmemtrace.h @@ -9,7 +9,7 @@ #ifdef __KERNEL__ -#include +#include #ifdef CONFIG_KMEMTRACE extern void kmemtrace_init(void); diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 980eb66a6e3..18869417109 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -43,7 +43,7 @@ #endif #ifndef TRACE_INCLUDE_PATH -# define __TRACE_INCLUDE(system) +# define __TRACE_INCLUDE(system) # define UNDEF_TRACE_INCLUDE_FILE #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 00000000000..75e3468e449 --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 00000000000..c22c42f980b --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,194 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 00000000000..45e326b5c7f --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,60 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h new file mode 100644 index 00000000000..ffa1cab586b --- /dev/null +++ b/include/trace/events/sched.h @@ -0,0 +1,339 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched + +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 00000000000..1e8fabb57c0 --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index 75e3468e449..00000000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,57 +0,0 @@ -#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_IRQ_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#endif /* _TRACE_IRQ_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h deleted file mode 100644 index c22c42f980b..00000000000 --- a/include/trace/kmem.h +++ /dev/null @@ -1,194 +0,0 @@ -#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KMEM_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); -#endif /* _TRACE_KMEM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 45e326b5c7f..00000000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,60 +0,0 @@ -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index ffa1cab586b..00000000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,339 +0,0 @@ -#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SCHED_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#endif /* _TRACE_SCHED_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index 1e8fabb57c0..00000000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,40 +0,0 @@ -#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SKB_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#endif /* _TRACE_SKB_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/exit.c b/kernel/exit.c index 2fe9d2c7eee..cab535c427b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 4bebf263923..085f73ebcea 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include @@ -71,6 +70,8 @@ #include #include +#include + /* * Protected counters by write_lock_irq(&tasklist_lock) */ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 983d8be8dff..37c63633e78 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -20,7 +20,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include #include "internals.h" diff --git a/kernel/kthread.c b/kernel/kthread.c index e1c76924545..41c88fe4050 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #define KTHREAD_NICE_LEVEL (-5) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 257f21a76c5..47b201ecc6d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -48,7 +48,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; diff --git a/kernel/sched.c b/kernel/sched.c index e6d4518d47e..9f7ffd00b6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -79,7 +79,7 @@ #include "sched_cpupri.h" #define CREATE_TRACE_POINTS -#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] diff --git a/kernel/signal.c b/kernel/signal.c index 1d5703ff003..94ec0a4dde0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/softirq.c b/kernel/softirq.c index a2d9b458ac2..7ab9dfd8d08 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e6a0b5c994..a2348898858 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9d8cccdfaa0..a98106dd979 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5bc00e8f153..b8b13c5540f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/mm/util.c b/mm/util.c index 0e74a22791c..6794a336e9a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,7 +7,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include /** * kstrdup - allocate space for and copy an existing string diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9fd0dc3cca9..b75b6cea49d 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 80177205947..499a67eaf3a 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -24,6 +24,6 @@ #include #define CREATE_TRACE_POINTS -#include +#include EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356cd9f7..12806b84445 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include "kmap_skb.h" -- cgit v1.2.3-70-g09d2 From 93eb677d74a4f7d3edfb678c94f6c0544d9fbad2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 13:24:06 -0400 Subject: ftrace: use module notifier for function tracer The hooks in the module code for the function tracer must be called before any of that module code runs. The function tracer hooks modify the module (replacing calls to mcount to nops). If the code is executed while the change occurs, then the CPU can take a GPF. To handle the above with a bit of paranoia, I originally implemented the hooks as calls directly from the module code. After examining the notifier calls, it looks as though the start up notify is called before any of the module's code is executed. This makes the use of the notify safe with ftrace. Only the startup notify is required to be "safe". The shutdown simply removes the entries from the ftrace function list, and does not modify any code. This change has another benefit. It removes a issue with a reverse dependency in the mutexes of ftrace_lock and module_mutex. [ Impact: fix lock dependency bug, cleanup ] Cc: Rusty Russell Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 7 ---- include/linux/module.h | 4 +++ kernel/module.c | 19 ++++------- kernel/trace/ftrace.c | 90 +++++++++++++++++++++++++++++++++++--------------- 4 files changed, 75 insertions(+), 45 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 53869bef610..97c83e1bc58 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); -extern void ftrace_release(void *start, unsigned long size); - extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } -static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } #endif /* diff --git a/include/linux/module.h b/include/linux/module.h index 6155fa44168..a8f2c0aa4c3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -341,6 +341,10 @@ struct module struct ftrace_event_call *trace_events; unsigned int num_trace_events; #endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + unsigned long *ftrace_callsites; + unsigned int num_ftrace_callsites; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/kernel/module.c b/kernel/module.c index a0394706f10..2383e60fcf3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1490,9 +1490,6 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); - /* release any pointers to mcount in this module */ - ftrace_release(mod->module_core, mod->core_size); - /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); @@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int num_mcount; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - unsigned long *mseg; mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->trace_events), &mod->num_trace_events); #endif - +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, + "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) @@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod, dynamic_debug_setup(debug, num_debug); } - /* sechdrs[0].sh_size is always zero */ - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", - sizeof(*mseg), &num_mcount); - ftrace_init_module(mod, mseg, mseg + num_mcount); - err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod, cleanup: kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); - ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a2348898858..5b606f45b6c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec) rec->flags |= FTRACE_FL_FREE; } -void ftrace_release(void *start, unsigned long size) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = s + size; - - if (ftrace_disabled || !start) - return; - - mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { - /* - * rec->ip is changed in ftrace_free_rec() - * It should not between s and e if record was freed. - */ - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); - ftrace_free_rec(rec); - } - } while_for_each_ftrace_rec(); - mutex_unlock(&ftrace_lock); -} - static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { struct dyn_ftrace *rec; @@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod, return 0; } -void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) +#ifdef CONFIG_MODULES +void ftrace_release(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + unsigned long s = (unsigned long)start; + unsigned long e = (unsigned long)end; + + if (ftrace_disabled || !start || start == end) + return; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + if ((rec->ip >= s) && (rec->ip < e)) { + /* + * rec->ip is changed in ftrace_free_rec() + * It should not between s and e if record was freed. + */ + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); + ftrace_free_rec(rec); + } + } while_for_each_ftrace_rec(); + mutex_unlock(&ftrace_lock); +} + +static void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; ftrace_convert_nops(mod, start, end); } +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + case MODULE_STATE_GOING: + ftrace_release(mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + } + + return 0; +} +#else +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +struct notifier_block ftrace_module_nb = { + .notifier_call = ftrace_module_notify, + .priority = 0, +}; + extern unsigned long __start_mcount_loc[]; extern unsigned long __stop_mcount_loc[]; @@ -2791,6 +2825,10 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); + ret = register_module_notifier(&ftrace_module_nb); + if (!ret) + pr_warning("Failed to register trace ftrace module notifier\n"); + return; failed: ftrace_disabled = 1; -- cgit v1.2.3-70-g09d2 From 24ed0c4bfc7d2d7507bb9d50f7f3bbdcd85d76dd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 17 May 2009 15:31:38 +0800 Subject: tracing: fix check for return value of register_module_notifier return zero should be correct, so fix it. [ Impact: eliminate incorrect syslog message ] Signed-off-by: Ming Lei Acked-by: Frederic Weisbecker Acked-by: Li Zefan Cc: rostedt@goodmis.org LKML-Reference: <1242545498-7285-1-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b606f45b6c..140699a9a8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2826,7 +2826,7 @@ void __init ftrace_init(void) __stop_mcount_loc); ret = register_module_notifier(&ftrace_module_nb); - if (!ret) + if (ret) pr_warning("Failed to register trace ftrace module notifier\n"); return; -- cgit v1.2.3-70-g09d2 From 2af15d6a44b871ad4c2a651302374cde8f335480 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 28 May 2009 13:37:24 -0400 Subject: ftrace: add kernel command line function filtering When using ftrace=function on the command line to trace functions on boot up, one can not filter out functions that are commonly called. This patch adds two new ftrace command line commands. ftrace_notrace=function-list ftrace_filter=function-list Where function-list is a comma separated list of functions to filter. The ftrace_notrace will make the functions listed not be included in the function tracing, and ftrace_filter will only trace the functions listed. These two act the same as the debugfs/tracing/set_ftrace_notrace and debugfs/tracing/set_ftrace_filter respectively. The simple glob expressions that are allowed by the filter files can also be used by the command line interface. ftrace_notrace=rcu*,*lock,*spin* Will not trace any function that starts with rcu, ends with lock, or has the word spin in it. Note, if the self tests are enabled, they may interfere with the filtering set by the command lines. Signed-off-by: Steven Rostedt --- Documentation/kernel-parameters.txt | 17 +++++++++++++-- kernel/trace/ftrace.c | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9243dd84f4d..fcd3bfbe74e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -751,12 +751,25 @@ and is between 256 and 4096 characters. It is defined in the file ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. ftrace=[tracer] - [ftrace] will set and start the specified tracer + [FTRACE] will set and start the specified tracer as early as possible in order to facilitate early boot debugging. ftrace_dump_on_oops - [ftrace] will dump the trace buffers on oops. + [FTRACE] will dump the trace buffers on oops. + + ftrace_filter=[function-list] + [FTRACE] Limit the functions traced by the function + tracer at boot up. function-list is a comma separated + list of functions. This list can be changed at run + time by the set_ftrace_filter file in the debugfs + tracing directory. + + ftrace_notrace=[function-list] + [FTRACE] Do not trace the functions specified in + function-list. This list can be changed at run time + by the set_ftrace_notrace file in the debugfs + tracing directory. gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 140699a9a8a..2074e5b7766 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -32,6 +32,7 @@ #include #include +#include #include "trace_output.h" #include "trace_stat.h" @@ -2369,6 +2370,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset) ftrace_set_regex(buf, len, reset, 0); } +/* + * command line interface to allow users to set filters on boot up. + */ +#define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE +static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; +static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; + +static int __init set_ftrace_notrace(char *str) +{ + strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_notrace=", set_ftrace_notrace); + +static int __init set_ftrace_filter(char *str) +{ + strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_filter=", set_ftrace_filter); + +static void __init set_ftrace_early_filter(char *buf, int enable) +{ + char *func; + + while (buf) { + func = strsep(&buf, ","); + ftrace_set_regex(func, strlen(func), 0, enable); + } +} + +static void __init set_ftrace_early_filters(void) +{ + if (ftrace_filter_buf[0]) + set_ftrace_early_filter(ftrace_filter_buf, 1); + if (ftrace_notrace_buf[0]) + set_ftrace_early_filter(ftrace_notrace_buf, 0); +} + static int ftrace_regex_release(struct inode *inode, struct file *file, int enable) { @@ -2829,6 +2869,8 @@ void __init ftrace_init(void) if (ret) pr_warning("Failed to register trace ftrace module notifier\n"); + set_ftrace_early_filters(); + return; failed: ftrace_disabled = 1; -- cgit v1.2.3-70-g09d2 From 0f6ce3de4ef6ff940308087c49760d068851c1a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Jun 2009 21:51:28 -0400 Subject: ftrace: do not profile functions when disabled A race was found that if one were to enable and disable the function profiler repeatedly, then the system can panic. This was because a profiled function may be preempted just before disabling interrupts. While the profiler is disabled and then reenabled, the preempted function could start again, and access the hash as it is being initialized. This just adds a check in the irq disabled part to check if the profiler is enabled, and if it is not then it will just exit. When the system is disabled, the profile_enabled variable is cleared before calling the unregistering of the function profiler. This unregistering calls stop machine which also acts as a synchronize schedule. [ Impact: fix panic in enabling/disabling function profiler ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2074e5b7766..d6973dfadb3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -599,7 +599,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) local_irq_save(flags); stat = &__get_cpu_var(ftrace_profile_stats); - if (!stat->hash) + if (!stat->hash || !ftrace_profile_enabled) goto out; rec = ftrace_find_profiled_func(stat, ip); @@ -630,7 +630,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) local_irq_save(flags); stat = &__get_cpu_var(ftrace_profile_stats); - if (!stat->hash) + if (!stat->hash || !ftrace_profile_enabled) goto out; calltime = trace->rettime - trace->calltime; @@ -724,6 +724,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, ftrace_profile_enabled = 1; } else { ftrace_profile_enabled = 0; + /* + * unregister_ftrace_profiler calls stop_machine + * so this acts like an synchronize_sched. + */ unregister_ftrace_profiler(); } } -- cgit v1.2.3-70-g09d2 From 179c498ae2998461fe436437a74dc29036fc7dcc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Jun 2009 12:03:19 -0400 Subject: function-graph: only allocate init tasks if it was not already done When the function graph tracer is enabled, it calls the initialization needed for the init tasks that would be called on all created tasks. The problem is that this is called every time the function graph tracer is enabled, and the ret_stack is allocated for the idle tasks each time. Thus, the old ret_stack is lost and a memory leak is created. This is also dangerous because if an interrupt happened on another CPU with the init task and the ret_stack is replaced, we then lose all the return pointers for the interrupt, and a crash would take place. [ Impact: fix memory leak and possible crash due to race ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f1ed080406c..ebff62ef40b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2643,8 +2643,10 @@ static int start_graph_tracing(void) return -ENOMEM; /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) - ftrace_graph_init_task(idle_task(cpu)); + for_each_online_cpu(cpu) { + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_task(idle_task(cpu)); + } do { ret = alloc_retstack_tasklist(ret_stack_list); -- cgit v1.2.3-70-g09d2 From 82310a3272d5a2a7652f5649ad8a55f58c8f74d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Jun 2009 12:26:07 -0400 Subject: function-graph: enable the stack after initialization of other variables The function graph tracer checks if the task_struct has ret_stack defined to know if it is OK or not to use it. The initialization is done for all tasks by one process, but the idle tasks use the same initialization used by new tasks. If an interrupt happens on an idle task that just had the ret_stack created, but before the rest of the initialization took place, then we can corrupt the return address of the functions. This patch moves the setting of the task_struct's ret_stack to after the other variables have been initialized. [ Impact: prevent kernel panic on idle task when starting function graph ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 9 +++++++-- kernel/trace/trace_functions_graph.c | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ebff62ef40b..20e066065eb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2739,15 +2739,20 @@ void unregister_ftrace_graph(void) void ftrace_graph_init_task(struct task_struct *t) { if (atomic_read(&ftrace_graph_active)) { - t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + struct ftrace_ret_stack *ret_stack; + + ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); - if (!t->ret_stack) + if (!ret_stack) return; t->curr_ret_stack = -1; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; + /* make curr_ret_stack visable before we add the ret_stack */ + smp_wmb(); + t->ret_stack = ret_stack; } else t->ret_stack = NULL; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d28687e7b3a..baeb5fe3610 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) if (!current->ret_stack) return -EBUSY; + /* + * We must make sure the ret_stack is tested before we read + * anything else. + */ + smp_rmb(); + /* The return trace stack is full */ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { atomic_inc(¤t->trace_overrun); -- cgit v1.2.3-70-g09d2 From 26c01624a2a40f8a4ddf6449b65c9b1c418d0e72 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Jun 2009 14:01:19 -0400 Subject: function-graph: add memory barriers for accessing task's ret_stack The code that handles the tasks ret_stack allocation for every task assumes that only an interrupt can cause issues (even though interrupts are disabled). In reality, the code is allocating the ret_stack for tasks that may be running on other CPUs and there are not efficient memory barriers to handle this case. [ Impact: prevent crash due to using of uninitialized ret_stack variables ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 20e066065eb..1664d3f33d3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2580,12 +2580,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } if (t->ret_stack == NULL) { - t->curr_ret_stack = -1; - /* Make sure IRQs see the -1 first: */ - barrier(); - t->ret_stack = ret_stack_list[start++]; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); + t->curr_ret_stack = -1; + /* Make sure the tasks see the -1 first: */ + smp_wmb(); + t->ret_stack = ret_stack_list[start++]; } } while_each_thread(g, t); -- cgit v1.2.3-70-g09d2 From 84047e360af0394ac5861d433f26bbcf30f77dd1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Jun 2009 16:51:55 -0400 Subject: function-graph: always initialize task ret_stack On creating a new task while running the function graph tracer, if we fail to allocate the ret_stack, and then fail the fork, the code will free the parent ret_stack. This is because the child duplicated the parent and currently points to the parent's ret_stack. This patch always initializes the task's ret_stack to NULL. [ Impact: prevent crash of parent on low memory during fork ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1664d3f33d3..bb081f37cac 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2738,6 +2738,9 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { + /* Make sure we do not use the parent ret_stack */ + t->ret_stack = NULL; + if (atomic_read(&ftrace_graph_active)) { struct ftrace_ret_stack *ret_stack; @@ -2753,8 +2756,7 @@ void ftrace_graph_init_task(struct task_struct *t) /* make curr_ret_stack visable before we add the ret_stack */ smp_wmb(); t->ret_stack = ret_stack; - } else - t->ret_stack = NULL; + } } void ftrace_graph_exit_task(struct task_struct *t) -- cgit v1.2.3-70-g09d2 From 9ea1a153a4fb435c22e9988784bb476671286112 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Jun 2009 06:52:21 +0200 Subject: tracing/urgent: warn in case of ftrace_start_up inbalance Prevent from further ftrace_start_up inbalances so that we avoid future nop patching omissions with dynamic ftrace. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt --- kernel/trace/ftrace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bb60732ade0..3718d55fb4c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1224,6 +1224,13 @@ static void ftrace_shutdown(int command) return; ftrace_start_up--; + /* + * Just warn in case of unbalance, no need to kill ftrace, it's not + * critical but the ftrace_call callers may be never nopped again after + * further ftrace uses. + */ + WARN_ON_ONCE(ftrace_start_up < 0); + if (!ftrace_start_up) command |= FTRACE_DISABLE_CALLS; -- cgit v1.2.3-70-g09d2