diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/ftrace.c | 849 | ||||
-rw-r--r-- | kernel/trace/trace.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace.h | 38 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 54 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 208 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 446 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 64 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 30 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 22 |
12 files changed, 1283 insertions, 468 deletions
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b1e8943fed1..867bd1dd2dd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -22,11 +22,13 @@ #include <linux/hardirq.h> #include <linux/kthread.h> #include <linux/uaccess.h> +#include <linux/bsearch.h> #include <linux/module.h> #include <linux/ftrace.h> #include <linux/sysctl.h> #include <linux/slab.h> #include <linux/ctype.h> +#include <linux/sort.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/rcupdate.h> @@ -60,6 +62,8 @@ #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 +#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; @@ -87,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { }; static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; +static struct ftrace_ops control_ops; static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); @@ -166,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) } #endif +static void control_ops_disable_all(struct ftrace_ops *ops) +{ + int cpu; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(ops->disabled, cpu) = 1; +} + +static int control_ops_alloc(struct ftrace_ops *ops) +{ + int __percpu *disabled; + + disabled = alloc_percpu(int); + if (!disabled) + return -ENOMEM; + + ops->disabled = disabled; + control_ops_disable_all(ops); + return 0; +} + +static void control_ops_free(struct ftrace_ops *ops) +{ + free_percpu(ops->disabled); +} + static void update_global_ops(void) { ftrace_func_t func; @@ -257,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) return 0; } +static void add_ftrace_list_ops(struct ftrace_ops **list, + struct ftrace_ops *main_ops, + struct ftrace_ops *ops) +{ + int first = *list == &ftrace_list_end; + add_ftrace_ops(list, ops); + if (first) + add_ftrace_ops(&ftrace_ops_list, main_ops); +} + +static int remove_ftrace_list_ops(struct ftrace_ops **list, + struct ftrace_ops *main_ops, + struct ftrace_ops *ops) +{ + int ret = remove_ftrace_ops(list, ops); + if (!ret && *list == &ftrace_list_end) + ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); + return ret; +} + static int __register_ftrace_function(struct ftrace_ops *ops) { if (ftrace_disabled) @@ -268,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) return -EBUSY; + /* We don't support both control and global flags set. */ + if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) + return -EINVAL; + if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - int first = ftrace_global_list == &ftrace_list_end; - add_ftrace_ops(&ftrace_global_list, ops); + add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); ops->flags |= FTRACE_OPS_FL_ENABLED; - if (first) - add_ftrace_ops(&ftrace_ops_list, &global_ops); + } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { + if (control_ops_alloc(ops)) + return -ENOMEM; + add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); } else add_ftrace_ops(&ftrace_ops_list, ops); @@ -300,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) return -EINVAL; if (ops->flags & FTRACE_OPS_FL_GLOBAL) { - ret = remove_ftrace_ops(&ftrace_global_list, ops); - if (!ret && ftrace_global_list == &ftrace_list_end) - ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops); + ret = remove_ftrace_list_ops(&ftrace_global_list, + &global_ops, ops); if (!ret) ops->flags &= ~FTRACE_OPS_FL_ENABLED; + } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { + ret = remove_ftrace_list_ops(&ftrace_control_list, + &control_ops, ops); + if (!ret) { + /* + * The ftrace_ops is now removed from the list, + * so there'll be no new users. We must ensure + * all current users are done before we free + * the control data. + */ + synchronize_sched(); + control_ops_free(ops); + } } else ret = remove_ftrace_ops(&ftrace_ops_list, ops); @@ -947,13 +1016,6 @@ struct ftrace_func_probe { struct rcu_head rcu; }; -enum { - FTRACE_ENABLE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_START_FUNC_RET = (1 << 3), - FTRACE_STOP_FUNC_RET = (1 << 4), -}; struct ftrace_func_entry { struct hlist_node hlist; unsigned long ip; @@ -984,18 +1046,19 @@ static struct ftrace_ops global_ops = { .filter_hash = EMPTY_HASH, }; -static struct dyn_ftrace *ftrace_new_addrs; - static DEFINE_MUTEX(ftrace_regex_lock); struct ftrace_page { struct ftrace_page *next; + struct dyn_ftrace *records; int index; - struct dyn_ftrace records[]; + int size; }; -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) +static struct ftrace_page *ftrace_new_pgs; + +#define ENTRY_SIZE sizeof(struct dyn_ftrace) +#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) /* estimate from running different kernels */ #define NR_TO_INIT 10000 @@ -1003,7 +1066,10 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static struct dyn_ftrace *ftrace_free_records; +static bool ftrace_hash_empty(struct ftrace_hash *hash) +{ + return !hash || !hash->count; +} static struct ftrace_func_entry * ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) @@ -1013,7 +1079,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) struct hlist_head *hhd; struct hlist_node *n; - if (!hash->count) + if (ftrace_hash_empty(hash)) return NULL; if (hash->size_bits > 0) @@ -1120,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); } +void ftrace_free_filter(struct ftrace_ops *ops) +{ + free_ftrace_hash(ops->filter_hash); + free_ftrace_hash(ops->notrace_hash); +} + static struct ftrace_hash *alloc_ftrace_hash(int size_bits) { struct ftrace_hash *hash; @@ -1130,7 +1202,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits) return NULL; size = 1 << size_bits; - hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL); + hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); if (!hash->buckets) { kfree(hash); @@ -1157,7 +1229,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) return NULL; /* Empty hash? */ - if (!hash || !hash->count) + if (ftrace_hash_empty(hash)) return new_hash; size = 1 << hash->size_bits; @@ -1282,9 +1354,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) filter_hash = rcu_dereference_raw(ops->filter_hash); notrace_hash = rcu_dereference_raw(ops->notrace_hash); - if ((!filter_hash || !filter_hash->count || + if ((ftrace_hash_empty(filter_hash) || ftrace_lookup_ip(filter_hash, ip)) && - (!notrace_hash || !notrace_hash->count || + (ftrace_hash_empty(notrace_hash) || !ftrace_lookup_ip(notrace_hash, ip))) ret = 1; else @@ -1307,6 +1379,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) } \ } + +static int ftrace_cmp_recs(const void *a, const void *b) +{ + const struct dyn_ftrace *reca = a; + const struct dyn_ftrace *recb = b; + + if (reca->ip > recb->ip) + return 1; + if (reca->ip < recb->ip) + return -1; + return 0; +} + +/** + * ftrace_location - return true if the ip giving is a traced location + * @ip: the instruction pointer to check + * + * Returns 1 if @ip given is a pointer to a ftrace location. + * That is, the instruction that is either a NOP or call to + * the function tracer. It checks the ftrace internal tables to + * determine if the address belongs or not. + */ +int ftrace_location(unsigned long ip) +{ + struct ftrace_page *pg; + struct dyn_ftrace *rec; + struct dyn_ftrace key; + + key.ip = ip; + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + rec = bsearch(&key, pg->records, pg->index, + sizeof(struct dyn_ftrace), + ftrace_cmp_recs); + if (rec) + return 1; + } + + return 0; +} + static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int filter_hash, bool inc) @@ -1336,7 +1449,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (filter_hash) { hash = ops->filter_hash; other_hash = ops->notrace_hash; - if (!hash || !hash->count) + if (ftrace_hash_empty(hash)) all = 1; } else { inc = !inc; @@ -1346,7 +1459,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, * If the notrace hash has no items, * then there's nothing to do. */ - if (hash && !hash->count) + if (ftrace_hash_empty(hash)) return; } @@ -1363,8 +1476,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) match = 1; } else { - in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip); - in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip); + in_hash = !!ftrace_lookup_ip(hash, rec->ip); + in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); /* * @@ -1372,7 +1485,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, if (filter_hash && in_hash && !in_other_hash) match = 1; else if (!filter_hash && in_hash && - (in_other_hash || !other_hash->count)) + (in_other_hash || ftrace_hash_empty(other_hash))) match = 1; } if (!match) @@ -1406,40 +1519,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops, __ftrace_hash_rec_update(ops, filter_hash, 1); } -static void ftrace_free_rec(struct dyn_ftrace *rec) -{ - rec->freelist = ftrace_free_records; - ftrace_free_records = rec; - rec->flags |= FTRACE_FL_FREE; -} - static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { - struct dyn_ftrace *rec; - - /* First check for freed records */ - if (ftrace_free_records) { - rec = ftrace_free_records; - - if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { - FTRACE_WARN_ON_ONCE(1); - ftrace_free_records = NULL; + if (ftrace_pages->index == ftrace_pages->size) { + /* We should have allocated enough */ + if (WARN_ON(!ftrace_pages->next)) return NULL; - } - - ftrace_free_records = rec->freelist; - memset(rec, 0, sizeof(*rec)); - return rec; - } - - if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) { - /* allocate another page */ - ftrace_pages->next = - (void *)get_zeroed_page(GFP_KERNEL); - if (!ftrace_pages->next) - return NULL; - } ftrace_pages = ftrace_pages->next; } @@ -1459,8 +1544,6 @@ ftrace_record_ip(unsigned long ip) return NULL; rec->ip = ip; - rec->newlist = ftrace_new_addrs; - ftrace_new_addrs = rec; return rec; } @@ -1475,7 +1558,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p) printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); } -static void ftrace_bug(int failed, unsigned long ip) +/** + * ftrace_bug - report and shutdown function tracer + * @failed: The failed type (EFAULT, EINVAL, EPERM) + * @ip: The address that failed + * + * The arch code that enables or disables the function tracing + * can call ftrace_bug() when it has detected a problem in + * modifying the code. @failed should be one of either: + * EFAULT - if the problem happens on reading the @ip address + * EINVAL - if what is read at @ip is not what was expected + * EPERM - if the problem happens on writting to the @ip address + */ +void ftrace_bug(int failed, unsigned long ip) { switch (failed) { case -EFAULT: @@ -1517,24 +1612,19 @@ int ftrace_text_reserved(void *start, void *end) return 0; } - -static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) { - unsigned long ftrace_addr; unsigned long flag = 0UL; - ftrace_addr = (unsigned long)FTRACE_ADDR; - /* - * If we are enabling tracing: + * If we are updating calls: * * If the record has a ref count, then we need to enable it * because someone is using it. * * Otherwise we make sure its disabled. * - * If we are disabling tracing, then disable all records that + * If we are disabling calls, then disable all records that * are enabled. */ if (enable && (rec->flags & ~FTRACE_FL_MASK)) @@ -1542,18 +1632,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) - return 0; + return FTRACE_UPDATE_IGNORE; if (flag) { - rec->flags |= FTRACE_FL_ENABLED; + if (update) + rec->flags |= FTRACE_FL_ENABLED; + return FTRACE_UPDATE_MAKE_CALL; + } + + if (update) + rec->flags &= ~FTRACE_FL_ENABLED; + + return FTRACE_UPDATE_MAKE_NOP; +} + +/** + * ftrace_update_record, set a record that now is tracing or not + * @rec: the record to update + * @enable: set to 1 if the record is tracing, zero to force disable + * + * The records that represent all functions that can be traced need + * to be updated when tracing has been enabled. + */ +int ftrace_update_record(struct dyn_ftrace *rec, int enable) +{ + return ftrace_check_record(rec, enable, 1); +} + +/** + * ftrace_test_record, check if the record has been enabled or not + * @rec: the record to test + * @enable: set to 1 to check if enabled, 0 if it is disabled + * + * The arch code may need to test if a record is already set to + * tracing to determine how to modify the function code that it + * represents. + */ +int ftrace_test_record(struct dyn_ftrace *rec, int enable) +{ + return ftrace_check_record(rec, enable, 0); +} + +static int +__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +{ + unsigned long ftrace_addr; + int ret; + + ftrace_addr = (unsigned long)FTRACE_ADDR; + + ret = ftrace_update_record(rec, enable); + + switch (ret) { + case FTRACE_UPDATE_IGNORE: + return 0; + + case FTRACE_UPDATE_MAKE_CALL: return ftrace_make_call(rec, ftrace_addr); + + case FTRACE_UPDATE_MAKE_NOP: + return ftrace_make_nop(NULL, rec, ftrace_addr); } - rec->flags &= ~FTRACE_FL_ENABLED; - return ftrace_make_nop(NULL, rec, ftrace_addr); + return -1; /* unknow ftrace bug */ } -static void ftrace_replace_code(int enable) +static void ftrace_replace_code(int update) { struct dyn_ftrace *rec; struct ftrace_page *pg; @@ -1563,11 +1707,7 @@ static void ftrace_replace_code(int enable) return; do_for_each_ftrace_rec(pg, rec) { - /* Skip over free records */ - if (rec->flags & FTRACE_FL_FREE) - continue; - - failed = __ftrace_replace_code(rec, enable); + failed = __ftrace_replace_code(rec, update); if (failed) { ftrace_bug(failed, rec->ip); /* Stop processing */ @@ -1576,6 +1716,78 @@ static void ftrace_replace_code(int enable) } while_for_each_ftrace_rec(); } +struct ftrace_rec_iter { + struct ftrace_page *pg; + int index; +}; + +/** + * ftrace_rec_iter_start, start up iterating over traced functions + * + * Returns an iterator handle that is used to iterate over all + * the records that represent address locations where functions + * are traced. + * + * May return NULL if no records are available. + */ +struct ftrace_rec_iter *ftrace_rec_iter_start(void) +{ + /* + * We only use a single iterator. + * Protected by the ftrace_lock mutex. + */ + static struct ftrace_rec_iter ftrace_rec_iter; + struct ftrace_rec_iter *iter = &ftrace_rec_iter; + + iter->pg = ftrace_pages_start; + iter->index = 0; + + /* Could have empty pages */ + while (iter->pg && !iter->pg->index) + iter->pg = iter->pg->next; + + if (!iter->pg) + return NULL; + + return iter; +} + +/** + * ftrace_rec_iter_next, get the next record to process. + * @iter: The handle to the iterator. + * + * Returns the next iterator after the given iterator @iter. + */ +struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) +{ + iter->index++; + + if (iter->index >= iter->pg->index) { + iter->pg = iter->pg->next; + iter->index = 0; + + /* Could have empty pages */ + while (iter->pg && !iter->pg->index) + iter->pg = iter->pg->next; + } + + if (!iter->pg) + return NULL; + + return iter; +} + +/** + * ftrace_rec_iter_record, get the record at the iterator location + * @iter: The current iterator location + * + * Returns the record that the current @iter is at. + */ +struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) +{ + return &iter->pg->records[iter->index]; +} + static int ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) { @@ -1617,13 +1829,7 @@ static int __ftrace_modify_code(void *data) { int *command = data; - /* - * Do not call function tracer while we update the code. - * We are in stop machine, no worrying about races. - */ - function_trace_stop++; - - if (*command & FTRACE_ENABLE_CALLS) + if (*command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); else if (*command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); @@ -1636,21 +1842,33 @@ static int __ftrace_modify_code(void *data) else if (*command & FTRACE_STOP_FUNC_RET) ftrace_disable_ftrace_graph_caller(); -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - /* - * For archs that call ftrace_test_stop_func(), we must - * wait till after we update all the function callers - * before we update the callback. This keeps different - * ops that record different functions from corrupting - * each other. - */ - __ftrace_trace_function = __ftrace_trace_function_delay; -#endif - function_trace_stop--; - return 0; } +/** + * ftrace_run_stop_machine, go back to the stop machine method + * @command: The command to tell ftrace what to do + * + * If an arch needs to fall back to the stop machine method, the + * it can call this function. + */ +void ftrace_run_stop_machine(int command) +{ + stop_machine(__ftrace_modify_code, &command, NULL); +} + +/** + * arch_ftrace_update_code, modify the code to trace or not trace + * @command: The command that needs to be done + * + * Archs can override this function if it does not need to + * run stop_machine() to modify code. + */ +void __weak arch_ftrace_update_code(int command) +{ + ftrace_run_stop_machine(command); +} + static void ftrace_run_update_code(int command) { int ret; @@ -1659,8 +1877,31 @@ static void ftrace_run_update_code(int command) FTRACE_WARN_ON(ret); if (ret) return; + /* + * Do not call function tracer while we update the code. + * We are in stop machine. + */ + function_trace_stop++; - stop_machine(__ftrace_modify_code, &command, NULL); + /* + * By default we use stop_machine() to modify the code. + * But archs can do what ever they want as long as it + * is safe. The stop_machine() is the safest, but also + * produces the most overhead. + */ + arch_ftrace_update_code(command); + +#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + /* + * For archs that call ftrace_test_stop_func(), we must + * wait till after we update all the function callers + * before we update the callback. This keeps different + * ops that record different functions from corrupting + * each other. + */ + __ftrace_trace_function = __ftrace_trace_function_delay; +#endif + function_trace_stop--; ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); @@ -1691,7 +1932,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) return -ENODEV; ftrace_start_up++; - command |= FTRACE_ENABLE_CALLS; + command |= FTRACE_UPDATE_CALLS; /* ops marked global share the filter hashes */ if (ops->flags & FTRACE_OPS_FL_GLOBAL) { @@ -1743,8 +1984,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) if (ops != &global_ops || !global_start_up) ops->flags &= ~FTRACE_OPS_FL_ENABLED; - if (!ftrace_start_up) - command |= FTRACE_DISABLE_CALLS; + command |= FTRACE_UPDATE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; @@ -1766,7 +2006,7 @@ static void ftrace_startup_sysctl(void) saved_ftrace_func = NULL; /* ftrace_start_up is true if we want ftrace running */ if (ftrace_start_up) - ftrace_run_update_code(FTRACE_ENABLE_CALLS); + ftrace_run_update_code(FTRACE_UPDATE_CALLS); } static void ftrace_shutdown_sysctl(void) @@ -1788,14 +2028,16 @@ static int ops_traces_mod(struct ftrace_ops *ops) struct ftrace_hash *hash; hash = ops->filter_hash; - return !!(!hash || !hash->count); + return ftrace_hash_empty(hash); } static int ftrace_update_code(struct module *mod) { + struct ftrace_page *pg; struct dyn_ftrace *p; cycle_t start, stop; unsigned long ref = 0; + int i; /* * When adding a module, we need to check if tracers are @@ -1817,46 +2059,44 @@ static int ftrace_update_code(struct module *mod) start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; - while (ftrace_new_addrs) { + for (pg = ftrace_new_pgs; pg; pg = pg->next) { - /* If something went wrong, bail without enabling anything */ - if (unlikely(ftrace_disabled)) - return -1; + for (i = 0; i < pg->index; i++) { + /* If something went wrong, bail without enabling anything */ + if (unlikely(ftrace_disabled)) + return -1; - p = ftrace_new_addrs; - ftrace_new_addrs = p->newlist; - p->flags = ref; + p = &pg->records[i]; + p->flags = ref; - /* - * Do the initial record conversion from mcount jump - * to the NOP instructions. - */ - if (!ftrace_code_disable(mod, p)) { - ftrace_free_rec(p); - /* Game over */ - break; - } + /* + * Do the initial record conversion from mcount jump + * to the NOP instructions. + */ + if (!ftrace_code_disable(mod, p)) + break; - ftrace_update_cnt++; + ftrace_update_cnt++; - /* - * If the tracing is enabled, go ahead and enable the record. - * - * The reason not to enable the record immediatelly is the - * inherent check of ftrace_make_nop/ftrace_make_call for - * correct previous instructions. Making first the NOP - * conversion puts the module to the correct state, thus - * passing the ftrace_make_call check. - */ - if (ftrace_start_up && ref) { - int failed = __ftrace_replace_code(p, 1); - if (failed) { - ftrace_bug(failed, p->ip); - ftrace_free_rec(p); + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + */ + if (ftrace_start_up && ref) { + int failed = __ftrace_replace_code(p, 1); + if (failed) + ftrace_bug(failed, p->ip); } } } + ftrace_new_pgs = NULL; + stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; @@ -1864,57 +2104,108 @@ static int ftrace_update_code(struct module *mod) return 0; } -static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) +static int ftrace_allocate_records(struct ftrace_page *pg, int count) { - struct ftrace_page *pg; + int order; int cnt; - int i; - /* allocate a few pages */ - ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); - if (!ftrace_pages_start) - return -1; + if (WARN_ON(!count)) + return -EINVAL; + + order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); /* - * Allocate a few more pages. - * - * TODO: have some parser search vmlinux before - * final linking to find all calls to ftrace. - * Then we can: - * a) know how many pages to allocate. - * and/or - * b) set up the table then. - * - * The dynamic code is still necessary for - * modules. + * We want to fill as much as possible. No more than a page + * may be empty. */ + while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) + order--; - pg = ftrace_pages = ftrace_pages_start; + again: + pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld entries in %d pages\n", - num_to_init, cnt + 1); + if (!pg->records) { + /* if we can't allocate this size, try something smaller */ + if (!order) + return -ENOMEM; + order >>= 1; + goto again; + } - for (i = 0; i < cnt; i++) { - pg->next = (void *)get_zeroed_page(GFP_KERNEL); + cnt = (PAGE_SIZE << order) / ENTRY_SIZE; + pg->size = cnt; - /* If we fail, we'll try later anyway */ - if (!pg->next) + if (cnt > count) + cnt = count; + + return cnt; +} + +static struct ftrace_page * +ftrace_allocate_pages(unsigned long num_to_init) +{ + struct ftrace_page *start_pg; + struct ftrace_page *pg; + int order; + int cnt; + + if (!num_to_init) + return 0; + + start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); + if (!pg) + return NULL; + + /* + * Try to allocate as much as possible in one continues + * location that fills in all of the space. We want to + * waste as little space as possible. + */ + for (;;) { + cnt = ftrace_allocate_records(pg, num_to_init); + if (cnt < 0) + goto free_pages; + + num_to_init -= cnt; + if (!num_to_init) break; + pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); + if (!pg->next) + goto free_pages; + pg = pg->next; } - return 0; + return start_pg; + + free_pages: + while (start_pg) { + order = get_count_order(pg->size / ENTRIES_PER_PAGE); + free_pages((unsigned long)pg->records, order); + start_pg = pg->next; + kfree(pg); + pg = start_pg; + } + pr_info("ftrace: FAILED to allocate memory for functions\n"); + return NULL; } -enum { - FTRACE_ITER_FILTER = (1 << 0), - FTRACE_ITER_NOTRACE = (1 << 1), - FTRACE_ITER_PRINTALL = (1 << 2), - FTRACE_ITER_HASH = (1 << 3), - FTRACE_ITER_ENABLED = (1 << 4), -}; +static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) +{ + int cnt; + + if (!num_to_init) { + pr_info("ftrace: No functions to be traced?\n"); + return -1; + } + + cnt = num_to_init / ENTRIES_PER_PAGE; + pr_info("ftrace: allocating %ld entries in %d pages\n", + num_to_init, cnt + 1); + + return 0; +} #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ @@ -1980,6 +2271,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) void *p = NULL; loff_t l; + if (!(iter->flags & FTRACE_ITER_DO_HASH)) + return NULL; + if (iter->func_pos > *pos) return NULL; @@ -2023,7 +2317,7 @@ static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct ftrace_ops *ops = &global_ops; + struct ftrace_ops *ops = iter->ops; struct dyn_ftrace *rec = NULL; if (unlikely(ftrace_disabled)) @@ -2047,9 +2341,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } } else { rec = &iter->pg->records[iter->idx++]; - if ((rec->flags & FTRACE_FL_FREE) || - - ((iter->flags & FTRACE_ITER_FILTER) && + if (((iter->flags & FTRACE_ITER_FILTER) && !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || ((iter->flags & FTRACE_ITER_NOTRACE) && @@ -2081,7 +2373,7 @@ static void reset_iter_read(struct ftrace_iterator *iter) static void *t_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct ftrace_ops *ops = &global_ops; + struct ftrace_ops *ops = iter->ops; void *p = NULL; loff_t l; @@ -2101,7 +2393,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) * off, we can short cut and just print out that all * functions are enabled. */ - if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) { + if (iter->flags & FTRACE_ITER_FILTER && + ftrace_hash_empty(ops->filter_hash)) { if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; @@ -2126,12 +2419,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) break; } - if (!p) { - if (iter->flags & FTRACE_ITER_FILTER) - return t_hash_start(m, pos); - - return NULL; - } + if (!p) + return t_hash_start(m, pos); return iter; } @@ -2189,6 +2478,7 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENOMEM; iter->pg = ftrace_pages_start; + iter->ops = &global_ops; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -2217,6 +2507,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file) iter->pg = ftrace_pages_start; iter->flags = FTRACE_ITER_ENABLED; + iter->ops = &global_ops; ret = seq_open(file, &show_ftrace_seq_ops); if (!ret) { @@ -2237,7 +2528,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) mutex_unlock(&ftrace_lock); } -static int +/** + * ftrace_regex_open - initialize function tracer filter files + * @ops: The ftrace_ops that hold the hash filters + * @flag: The type of filter to process + * @inode: The inode, usually passed in to your open routine + * @file: The file, usually passed in to your open routine + * + * ftrace_regex_open() initializes the filter files for the + * @ops. Depending on @flag it may process the filter hash or + * the notrace hash of @ops. With this called from the open + * routine, you can use ftrace_filter_write() for the write + * routine if @flag has FTRACE_ITER_FILTER set, or + * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. + * ftrace_regex_lseek() should be used as the lseek routine, and + * release must call ftrace_regex_release(). + */ +int ftrace_regex_open(struct ftrace_ops *ops, int flag, struct inode *inode, struct file *file) { @@ -2306,8 +2613,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, static int ftrace_filter_open(struct inode *inode, struct file *file) { - return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER, - inode, file); + return ftrace_regex_open(&global_ops, + FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, + inode, file); } static int @@ -2317,7 +2625,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file) inode, file); } -static loff_t +loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin) { loff_t ret; @@ -2426,7 +2734,6 @@ match_records(struct ftrace_hash *hash, char *buff, goto out_unlock; do_for_each_ftrace_rec(pg, rec) { - if (ftrace_match_record(rec, mod, search, search_len, type)) { ret = enter_record(hash, rec, not); if (ret < 0) { @@ -2871,14 +3178,14 @@ out_unlock: return ret; } -static ssize_t +ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { return ftrace_regex_write(file, ubuf, cnt, ppos, 1); } -static ssize_t +ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -2912,17 +3219,20 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, mutex_lock(&ftrace_regex_lock); if (reset) ftrace_filter_reset(hash); - if (buf) - ftrace_match_records(hash, buf, len); + if (buf && !ftrace_match_records(hash, buf, len)) { + ret = -EINVAL; + goto out_regex_unlock; + } mutex_lock(&ftrace_lock); ret = ftrace_hash_move(ops, enable, orig_hash, hash); if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) - ftrace_run_update_code(FTRACE_ENABLE_CALLS); + ftrace_run_update_code(FTRACE_UPDATE_CALLS); mutex_unlock(&ftrace_lock); + out_regex_unlock: mutex_unlock(&ftrace_regex_lock); free_ftrace_hash(hash); @@ -2939,10 +3249,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. */ -void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, +int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) { - ftrace_set_regex(ops, buf, len, reset, 1); + return ftrace_set_regex(ops, buf, len, reset, 1); } EXPORT_SYMBOL_GPL(ftrace_set_filter); @@ -2957,10 +3267,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); * is enabled. If @buf is NULL and reset is set, all functions will be enabled * for tracing. */ -void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, +int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) { - ftrace_set_regex(ops, buf, len, reset, 0); + return ftrace_set_regex(ops, buf, len, reset, 0); } EXPORT_SYMBOL_GPL(ftrace_set_notrace); /** @@ -3045,8 +3355,8 @@ static void __init set_ftrace_early_graph(char *buf) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static void __init -set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) +void __init +ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) { char *func; @@ -3059,17 +3369,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) static void __init set_ftrace_early_filters(void) { if (ftrace_filter_buf[0]) - set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1); + ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1); if (ftrace_notrace_buf[0]) - set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0); + ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_graph_buf[0]) set_ftrace_early_graph(ftrace_graph_buf); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ } -static int -ftrace_regex_release(struct inode *inode, struct file *file) +int ftrace_regex_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; struct ftrace_iterator *iter; @@ -3107,7 +3416,7 @@ ftrace_regex_release(struct inode *inode, struct file *file) orig_hash, iter->hash); if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) && ftrace_enabled) - ftrace_run_update_code(FTRACE_ENABLE_CALLS); + ftrace_run_update_code(FTRACE_UPDATE_CALLS); mutex_unlock(&ftrace_lock); } @@ -3270,9 +3579,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) do_for_each_ftrace_rec(pg, rec) { - if (rec->flags & FTRACE_FL_FREE) - continue; - if (ftrace_match_record(rec, NULL, search, search_len, type)) { /* if it is in the array */ exists = false; @@ -3381,15 +3687,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) return 0; } +static void ftrace_swap_recs(void *a, void *b, int size) +{ + struct dyn_ftrace *reca = a; + struct dyn_ftrace *recb = b; + struct dyn_ftrace t; + + t = *reca; + *reca = *recb; + *recb = t; +} + static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { + struct ftrace_page *pg; + unsigned long count; unsigned long *p; unsigned long addr; unsigned long flags = 0; /* Shut up gcc */ + int ret = -ENOMEM; + + count = end - start; + + if (!count) + return 0; + + pg = ftrace_allocate_pages(count); + if (!pg) + return -ENOMEM; mutex_lock(&ftrace_lock); + + /* + * Core and each module needs their own pages, as + * modules will free them when they are removed. + * Force a new page to be allocated for modules. + */ + if (!mod) { + WARN_ON(ftrace_pages || ftrace_pages_start); + /* First initialization */ + ftrace_pages = ftrace_pages_start = pg; + } else { + if (!ftrace_pages) + goto out; + + if (WARN_ON(ftrace_pages->next)) { + /* Hmm, we have free pages? */ + while (ftrace_pages->next) + ftrace_pages = ftrace_pages->next; + } + + ftrace_pages->next = pg; + ftrace_pages = pg; + } + p = start; while (p < end) { addr = ftrace_call_adjust(*p++); @@ -3401,9 +3754,18 @@ static int ftrace_process_locs(struct module *mod, */ if (!addr) continue; - ftrace_record_ip(addr); + if (!ftrace_record_ip(addr)) + break; } + /* These new locations need to be initialized */ + ftrace_new_pgs = pg; + + /* Make each individual set of pages sorted by ips */ + for (; pg; pg = pg->next) + sort(pg->records, pg->index, sizeof(struct dyn_ftrace), + ftrace_cmp_recs, ftrace_swap_recs); + /* * We only need to disable interrupts on start up * because we are modifying code that an interrupt @@ -3417,32 +3779,55 @@ static int ftrace_process_locs(struct module *mod, ftrace_update_code(mod); if (!mod) local_irq_restore(flags); + ret = 0; + out: mutex_unlock(&ftrace_lock); - return 0; + return ret; } #ifdef CONFIG_MODULES + +#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) + void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; + struct ftrace_page **last_pg; struct ftrace_page *pg; + int order; mutex_lock(&ftrace_lock); if (ftrace_disabled) goto out_unlock; - do_for_each_ftrace_rec(pg, rec) { + /* + * Each module has its own ftrace_pages, remove + * them from the list. + */ + last_pg = &ftrace_pages_start; + for (pg = ftrace_pages_start; pg; pg = *last_pg) { + rec = &pg->records[0]; if (within_module_core(rec->ip, mod)) { /* - * rec->ip is changed in ftrace_free_rec() - * It should not between s and e if record was freed. + * As core pages are first, the first + * page should never be a module page. */ - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); - ftrace_free_rec(rec); - } - } while_for_each_ftrace_rec(); + if (WARN_ON(pg == ftrace_pages_start)) + goto out_unlock; + + /* Check if we are deleting the last page */ + if (pg == ftrace_pages) + ftrace_pages = next_to_ftrace_page(last_pg); + + *last_pg = pg->next; + order = get_count_order(pg->size / ENTRIES_PER_PAGE); + free_pages((unsigned long)pg->records, order); + kfree(pg); + } else + last_pg = &pg->next; + } out_unlock: mutex_unlock(&ftrace_lock); } @@ -3562,6 +3947,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) #endif /* CONFIG_DYNAMIC_FTRACE */ static void +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_ops *op; + + if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) + return; + + /* + * Some of the ops may be dynamically allocated, + * they must be freed after a synchronize_sched(). + */ + preempt_disable_notrace(); + trace_recursion_set(TRACE_CONTROL_BIT); + op = rcu_dereference_raw(ftrace_control_list); + while (op != &ftrace_list_end) { + if (!ftrace_function_local_disabled(op) && + ftrace_ops_test(op, ip)) + op->func(ip, parent_ip); + + op = rcu_dereference_raw(op->next); + }; + trace_recursion_clear(TRACE_CONTROL_BIT); + preempt_enable_notrace(); +} + +static struct ftrace_ops control_ops = { + .func = ftrace_ops_control_func, +}; + +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3f1bc5d2a0..10d5503f0d0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2764,12 +2764,12 @@ static const char readme_msg[] = "tracing mini-HOWTO:\n\n" "# mount -t debugfs nodev /sys/kernel/debug\n\n" "# cat /sys/kernel/debug/tracing/available_tracers\n" - "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" + "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" "# cat /sys/kernel/debug/tracing/current_tracer\n" "nop\n" - "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" + "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" "# cat /sys/kernel/debug/tracing/current_tracer\n" - "sched_switch\n" + "wakeup\n" "# cat /sys/kernel/debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b93ecbadad6..54faec790bc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -56,17 +56,23 @@ enum trace_type { #define F_STRUCT(args...) args #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ - struct struct_name { \ - struct trace_entry ent; \ - tstruct \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ + struct struct_name { \ + struct trace_entry ent; \ + tstruct \ } #undef TP_ARGS #define TP_ARGS(args...) args #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) +#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) + +#undef FTRACE_ENTRY_REG +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ + filter, regfn) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ + filter) #include "trace_entries.h" @@ -288,6 +294,8 @@ struct tracer { /* for function tracing recursion */ #define TRACE_INTERNAL_BIT (1<<11) #define TRACE_GLOBAL_BIT (1<<12) +#define TRACE_CONTROL_BIT (1<<13) + /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function @@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task) static inline int ftrace_is_dead(void) { return 0; } #endif +int ftrace_event_is_function(struct ftrace_event_call *call); + /* * struct trace_parser - servers for reading the user input separated by spaces * @cont: set if the input is not complete - no final space char was found @@ -766,9 +776,7 @@ struct filter_pred { u64 val; struct regex regex; unsigned short *ops; -#ifdef CONFIG_FTRACE_STARTUP_TEST struct ftrace_event_field *field; -#endif int offset; int not; int op; @@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; #undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ +#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ extern struct ftrace_event_call \ __attribute__((__aligned__(4))) event_##call; #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ - FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) +#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ + FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ + filter) #include "trace_entries.h" +#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_FUNCTION_TRACER +int perf_ftrace_event_register(struct ftrace_event_call *call, + enum trace_reg type, void *data); +#else +#define perf_ftrace_event_register NULL +#endif /* CONFIG_FUNCTION_TRACER */ +#endif /* CONFIG_PERF_EVENTS */ + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 93365907f21..d91eb0541b3 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -55,7 +55,7 @@ /* * Function trace entry - function address and parent function address: */ -FTRACE_ENTRY(function, ftrace_entry, +FTRACE_ENTRY_REG(function, ftrace_entry, TRACE_FN, @@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry, __field( unsigned long, parent_ip ) ), - F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) + F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip), + + FILTER_TRACE_FN, + + perf_ftrace_event_register ); /* Function call entry */ @@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, __field_desc( int, graph_ent, depth ) ), - F_printk("--> %lx (%d)", __entry->func, __entry->depth) + F_printk("--> %lx (%d)", __entry->func, __entry->depth), + + FILTER_OTHER ); /* Function return entry */ @@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", __entry->func, __entry->depth, __entry->calltime, __entry->rettime, - __entry->depth) + __entry->depth), + + FILTER_OTHER ); /* @@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry, F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, - __entry->next_cpu - ) + __entry->next_cpu), + + FILTER_OTHER ); /* @@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", __entry->prev_pid, __entry->prev_prio, __entry->prev_state, __entry->next_pid, __entry->next_prio, __entry->next_state, - __entry->next_cpu - ) + __entry->next_cpu), + + FILTER_OTHER ); /* @@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry, "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", __entry->caller[0], __entry->caller[1], __entry->caller[2], __entry->caller[3], __entry->caller[4], __entry->caller[5], - __entry->caller[6], __entry->caller[7]) + __entry->caller[6], __entry->caller[7]), + + FILTER_OTHER ); FTRACE_ENTRY(user_stack, userstack_entry, @@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry, "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", __entry->caller[0], __entry->caller[1], __entry->caller[2], __entry->caller[3], __entry->caller[4], __entry->caller[5], - __entry->caller[6], __entry->caller[7]) + __entry->caller[6], __entry->caller[7]), + + FILTER_OTHER ); /* @@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry, ), F_printk("%08lx fmt:%p", - __entry->ip, __entry->fmt) + __entry->ip, __entry->fmt), + + FILTER_OTHER ); FTRACE_ENTRY(print, print_entry, @@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry, ), F_printk("%08lx %s", - __entry->ip, __entry->buf) + __entry->ip, __entry->buf), + + FILTER_OTHER ); FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, @@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, F_printk("%lx %lx %lx %d %x %x", (unsigned long)__entry->phys, __entry->value, __entry->pc, - __entry->map_id, __entry->opcode, __entry->width) + __entry->map_id, __entry->opcode, __entry->width), + + FILTER_OTHER ); FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, @@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, F_printk("%lx %lx %lx %d %x", (unsigned long)__entry->phys, __entry->virt, __entry->len, - __entry->map_id, __entry->opcode) + __entry->map_id, __entry->opcode), + + FILTER_OTHER ); @@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch, F_printk("%u:%s:%s (%u)", __entry->line, - __entry->func, __entry->file, __entry->correct) + __entry->func, __entry->file, __entry->correct), + + FILTER_OTHER ); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 19a359d5e6d..fee3752ae8f 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,6 +24,11 @@ static int total_ref_count; static int perf_trace_event_perm(struct ftrace_event_call *tp_event, struct perf_event *p_event) { + /* The ftrace function trace is allowed only for root. */ + if (ftrace_event_is_function(tp_event) && + perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) return 0; @@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, return 0; } -static int perf_trace_event_init(struct ftrace_event_call *tp_event, - struct perf_event *p_event) +static int perf_trace_event_reg(struct ftrace_event_call *tp_event, + struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret; + int ret = -ENOMEM; int cpu; - ret = perf_trace_event_perm(tp_event, p_event); - if (ret) - return ret; - p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; - ret = -ENOMEM; - list = alloc_percpu(struct hlist_head); if (!list) goto fail; @@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, } } - ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); + ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL); if (ret) goto fail; @@ -108,6 +107,69 @@ fail: return ret; } +static void perf_trace_event_unreg(struct perf_event *p_event) +{ + struct ftrace_event_call *tp_event = p_event->tp_event; + int i; + + if (--tp_event->perf_refcount > 0) + goto out; + + tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); + + /* + * Ensure our callback won't be called anymore. The buffers + * will be freed after that. + */ + tracepoint_synchronize_unregister(); + + free_percpu(tp_event->perf_events); + tp_event->perf_events = NULL; + + if (!--total_ref_count) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; + } + } +out: + module_put(tp_event->mod); +} + +static int perf_trace_event_open(struct perf_event *p_event) +{ + struct ftrace_event_call *tp_event = p_event->tp_event; + return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); +} + +static void perf_trace_event_close(struct perf_event *p_event) +{ + struct ftrace_event_call *tp_event = p_event->tp_event; + tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); +} + +static int perf_trace_event_init(struct ftrace_event_call *tp_event, + struct perf_event *p_event) +{ + int ret; + + ret = perf_trace_event_perm(tp_event, p_event); + if (ret) + return ret; + + ret = perf_trace_event_reg(tp_event, p_event); + if (ret) + return ret; + + ret = perf_trace_event_open(p_event); + if (ret) { + perf_trace_event_unreg(p_event); + return ret; + } + + return 0; +} + int perf_trace_init(struct perf_event *p_event) { struct ftrace_event_call *tp_event; @@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event) return ret; } +void perf_trace_destroy(struct perf_event *p_event) +{ + mutex_lock(&event_mutex); + perf_trace_event_close(p_event); + perf_trace_event_unreg(p_event); + mutex_unlock(&event_mutex); +} + int perf_trace_add(struct perf_event *p_event, int flags) { struct ftrace_event_call *tp_event = p_event->tp_event; @@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags) list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); - return 0; + return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); } void perf_trace_del(struct perf_event *p_event, int flags) { - hlist_del_rcu(&p_event->hlist_entry); -} - -void perf_trace_destroy(struct perf_event *p_event) -{ struct ftrace_event_call *tp_event = p_event->tp_event; - int i; - - mutex_lock(&event_mutex); - if (--tp_event->perf_refcount > 0) - goto out; - - tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); - - /* - * Ensure our callback won't be called anymore. The buffers - * will be freed after that. - */ - tracepoint_synchronize_unregister(); - - free_percpu(tp_event->perf_events); - tp_event->perf_events = NULL; - - if (!--total_ref_count) { - for (i = 0; i < PERF_NR_CONTEXTS; i++) { - free_percpu(perf_trace_buf[i]); - perf_trace_buf[i] = NULL; - } - } -out: - module_put(tp_event->mod); - mutex_unlock(&event_mutex); + hlist_del_rcu(&p_event->hlist_entry); + tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, @@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, return raw_data; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); + +#ifdef CONFIG_FUNCTION_TRACER +static void +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +{ + struct ftrace_entry *entry; + struct hlist_head *head; + struct pt_regs regs; + int rctx; + +#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ + sizeof(u64)) - sizeof(u32)) + + BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); + + perf_fetch_caller_regs(®s); + + entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); + if (!entry) + return; + + entry->ip = ip; + entry->parent_ip = parent_ip; + + head = this_cpu_ptr(event_function.perf_events); + perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, + 1, ®s, head); + +#undef ENTRY_SIZE +} + +static int perf_ftrace_function_register(struct perf_event *event) +{ + struct ftrace_ops *ops = &event->ftrace_ops; + + ops->flags |= FTRACE_OPS_FL_CONTROL; + ops->func = perf_ftrace_function_call; + return register_ftrace_function(ops); +} + +static int perf_ftrace_function_unregister(struct perf_event *event) +{ + struct ftrace_ops *ops = &event->ftrace_ops; + int ret = unregister_ftrace_function(ops); + ftrace_free_filter(ops); + return ret; +} + +static void perf_ftrace_function_enable(struct perf_event *event) +{ + ftrace_function_local_enable(&event->ftrace_ops); +} + +static void perf_ftrace_function_disable(struct perf_event *event) +{ + ftrace_function_local_disable(&event->ftrace_ops); +} + +int perf_ftrace_event_register(struct ftrace_event_call *call, + enum trace_reg type, void *data) +{ + switch (type) { + case TRACE_REG_REGISTER: + case TRACE_REG_UNREGISTER: + break; + case TRACE_REG_PERF_REGISTER: + case TRACE_REG_PERF_UNREGISTER: + return 0; + case TRACE_REG_PERF_OPEN: + return perf_ftrace_function_register(data); + case TRACE_REG_PERF_CLOSE: + return perf_ftrace_function_unregister(data); + case TRACE_REG_PERF_ADD: + perf_ftrace_function_enable(data); + return 0; + case TRACE_REG_PERF_DEL: + perf_ftrace_function_disable(data); + return 0; + } + + return -EINVAL; +} +#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c212a7f934e..079a93ae8a9 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); -int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) +int ftrace_event_reg(struct ftrace_event_call *call, + enum trace_reg type, void *data) { switch (type) { case TRACE_REG_REGISTER: @@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) call->class->perf_probe, call); return 0; + case TRACE_REG_PERF_OPEN: + case TRACE_REG_PERF_CLOSE: + case TRACE_REG_PERF_ADD: + case TRACE_REG_PERF_DEL: + return 0; #endif } return 0; @@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, tracing_stop_cmdline_record(); call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; } - call->class->reg(call, TRACE_REG_UNREGISTER); + call->class->reg(call, TRACE_REG_UNREGISTER, NULL); } break; case 1: @@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, tracing_start_cmdline_record(); call->flags |= TRACE_EVENT_FL_RECORDED_CMD; } - ret = call->class->reg(call, TRACE_REG_REGISTER); + ret = call->class->reg(call, TRACE_REG_REGISTER, NULL); if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f04cc3136bd..431dba8b754 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -81,6 +81,7 @@ enum { FILT_ERR_TOO_MANY_PREDS, FILT_ERR_MISSING_FIELD, FILT_ERR_INVALID_FILTER, + FILT_ERR_IP_FIELD_ONLY, }; static char *err_text[] = { @@ -96,6 +97,7 @@ static char *err_text[] = { "Too many terms in predicate expression", "Missing field name and/or value", "Meaningless filter expression", + "Only 'ip' field is supported for function trace", }; struct opstack_op { @@ -685,7 +687,7 @@ find_event_field(struct ftrace_event_call *call, char *name) static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) { - stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); + stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); if (!stack->preds) return -ENOMEM; stack->index = n_preds; @@ -826,8 +828,7 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) if (filter->preds) __free_preds(filter); - filter->preds = - kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL); + filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL); if (!filter->preds) return -ENOMEM; @@ -900,6 +901,11 @@ int filter_assign_type(const char *type) return FILTER_OTHER; } +static bool is_function_field(struct ftrace_event_field *field) +{ + return field->filter_type == FILTER_TRACE_FN; +} + static bool is_string_field(struct ftrace_event_field *field) { return field->filter_type == FILTER_DYN_STRING || @@ -987,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps, fn = filter_pred_strloc; else fn = filter_pred_pchar; + } else if (is_function_field(field)) { + if (strcmp(field->name, "ip")) { + parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0); + return -EINVAL; + } } else { if (field->is_signed) ret = strict_strtoll(pred->regex.pattern, 0, &val); @@ -1334,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps, strcpy(pred.regex.pattern, operand2); pred.regex.len = strlen(pred.regex.pattern); - -#ifdef CONFIG_FTRACE_STARTUP_TEST pred.field = field; -#endif return init_pred(ps, field, &pred) ? NULL : &pred; } @@ -1486,7 +1494,7 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root) children = count_leafs(preds, &preds[root->left]); children += count_leafs(preds, &preds[root->right]); - root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL); + root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL); if (!root->ops) return -ENOMEM; @@ -1738,11 +1746,121 @@ static int replace_system_preds(struct event_subsystem *system, return -ENOMEM; } +static int create_filter_start(char *filter_str, bool set_str, + struct filter_parse_state **psp, + struct event_filter **filterp) +{ + struct event_filter *filter; + struct filter_parse_state *ps = NULL; + int err = 0; + + WARN_ON_ONCE(*psp || *filterp); + + /* allocate everything, and if any fails, free all and fail */ + filter = __alloc_filter(); + if (filter && set_str) + err = replace_filter_string(filter, filter_str); + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + + if (!filter || !ps || err) { + kfree(ps); + __free_filter(filter); + return -ENOMEM; + } + + /* we're committed to creating a new filter */ + *filterp = filter; + *psp = ps; + + parse_init(ps, filter_ops, filter_str); + err = filter_parse(ps); + if (err && set_str) + append_filter_err(ps, filter); + return err; +} + +static void create_filter_finish(struct filter_parse_state *ps) +{ + if (ps) { + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + } +} + +/** + * create_filter - create a filter for a ftrace_event_call + * @call: ftrace_event_call to create a filter for + * @filter_str: filter string + * @set_str: remember @filter_str and enable detailed error in filter + * @filterp: out param for created filter (always updated on return) + * + * Creates a filter for @call with @filter_str. If @set_str is %true, + * @filter_str is copied and recorded in the new filter. + * + * On success, returns 0 and *@filterp points to the new filter. On + * failure, returns -errno and *@filterp may point to %NULL or to a new + * filter. In the latter case, the returned filter contains error + * information if @set_str is %true and the caller is responsible for + * freeing it. + */ +static int create_filter(struct ftrace_event_call *call, + char *filter_str, bool set_str, + struct event_filter **filterp) +{ + struct event_filter *filter = NULL; + struct filter_parse_state *ps = NULL; + int err; + + err = create_filter_start(filter_str, set_str, &ps, &filter); + if (!err) { + err = replace_preds(call, filter, ps, filter_str, false); + if (err && set_str) + append_filter_err(ps, filter); + } + create_filter_finish(ps); + + *filterp = filter; + return err; +} + +/** + * create_system_filter - create a filter for an event_subsystem + * @system: event_subsystem to create a filter for + * @filter_str: filter string + * @filterp: out param for created filter (always updated on return) + * + * Identical to create_filter() except that it creates a subsystem filter + * and always remembers @filter_str. + */ +static int create_system_filter(struct event_subsystem *system, + char *filter_str, struct event_filter **filterp) +{ + struct event_filter *filter = NULL; + struct filter_parse_state *ps = NULL; + int err; + + err = create_filter_start(filter_str, true, &ps, &filter); + if (!err) { + err = replace_system_preds(system, ps, filter_str); + if (!err) { + /* System filters just show a default message */ + kfree(filter->filter_string); + filter->filter_string = NULL; + } else { + append_filter_err(ps, filter); + } + } + create_filter_finish(ps); + + *filterp = filter; + return err; +} + int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { - struct filter_parse_state *ps; struct event_filter *filter; - struct event_filter *tmp; int err = 0; mutex_lock(&event_mutex); @@ -1759,49 +1877,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out_unlock; } - err = -ENOMEM; - ps = kzalloc(sizeof(*ps), GFP_KERNEL); - if (!ps) - goto out_unlock; + err = create_filter(call, filter_string, true, &filter); - filter = __alloc_filter(); - if (!filter) { - kfree(ps); - goto out_unlock; - } - - replace_filter_string(filter, filter_string); - - parse_init(ps, filter_ops, filter_string); - err = filter_parse(ps); - if (err) { - append_filter_err(ps, filter); - goto out; - } - - err = replace_preds(call, filter, ps, filter_string, false); - if (err) { - filter_disable(call); - append_filter_err(ps, filter); - } else - call->flags |= TRACE_EVENT_FL_FILTERED; -out: /* * Always swap the call filter with the new filter * even if there was an error. If there was an error * in the filter, we disable the filter and show the error * string */ - tmp = call->filter; - rcu_assign_pointer(call->filter, filter); - if (tmp) { - /* Make sure the call is done with the filter */ - synchronize_sched(); - __free_filter(tmp); + if (filter) { + struct event_filter *tmp = call->filter; + + if (!err) + call->flags |= TRACE_EVENT_FL_FILTERED; + else + filter_disable(call); + + rcu_assign_pointer(call->filter, filter); + + if (tmp) { + /* Make sure the call is done with the filter */ + synchronize_sched(); + __free_filter(tmp); + } } - filter_opstack_clear(ps); - postfix_clear(ps); - kfree(ps); out_unlock: mutex_unlock(&event_mutex); @@ -1811,7 +1910,6 @@ out_unlock: int apply_subsystem_event_filter(struct event_subsystem *system, char *filter_string) { - struct filter_parse_state *ps; struct event_filter *filter; int err = 0; @@ -1835,66 +1933,178 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out_unlock; } - err = -ENOMEM; - ps = kzalloc(sizeof(*ps), GFP_KERNEL); - if (!ps) - goto out_unlock; + err = create_system_filter(system, filter_string, &filter); + if (filter) { + /* + * No event actually uses the system filter + * we can free it without synchronize_sched(). + */ + __free_filter(system->filter); + system->filter = filter; + } +out_unlock: + mutex_unlock(&event_mutex); - filter = __alloc_filter(); - if (!filter) - goto out; + return err; +} - /* System filters just show a default message */ - kfree(filter->filter_string); - filter->filter_string = NULL; +#ifdef CONFIG_PERF_EVENTS + +void ftrace_profile_free_filter(struct perf_event *event) +{ + struct event_filter *filter = event->filter; + + event->filter = NULL; + __free_filter(filter); +} + +struct function_filter_data { + struct ftrace_ops *ops; + int first_filter; + int first_notrace; +}; + +#ifdef CONFIG_FUNCTION_TRACER +static char ** +ftrace_function_filter_re(char *buf, int len, int *count) +{ + char *str, *sep, **re; + + str = kstrndup(buf, len, GFP_KERNEL); + if (!str) + return NULL; /* - * No event actually uses the system filter - * we can free it without synchronize_sched(). + * The argv_split function takes white space + * as a separator, so convert ',' into spaces. */ - __free_filter(system->filter); - system->filter = filter; + while ((sep = strchr(str, ','))) + *sep = ' '; - parse_init(ps, filter_ops, filter_string); - err = filter_parse(ps); - if (err) - goto err_filter; + re = argv_split(GFP_KERNEL, str, count); + kfree(str); + return re; +} - err = replace_system_preds(system, ps, filter_string); - if (err) - goto err_filter; +static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, + int reset, char *re, int len) +{ + int ret; -out: - filter_opstack_clear(ps); - postfix_clear(ps); - kfree(ps); -out_unlock: - mutex_unlock(&event_mutex); + if (filter) + ret = ftrace_set_filter(ops, re, len, reset); + else + ret = ftrace_set_notrace(ops, re, len, reset); - return err; + return ret; +} -err_filter: - replace_filter_string(filter, filter_string); - append_filter_err(ps, system->filter); - goto out; +static int __ftrace_function_set_filter(int filter, char *buf, int len, + struct function_filter_data *data) +{ + int i, re_cnt, ret; + int *reset; + char **re; + + reset = filter ? &data->first_filter : &data->first_notrace; + + /* + * The 'ip' field could have multiple filters set, separated + * either by space or comma. We first cut the filter and apply + * all pieces separatelly. + */ + re = ftrace_function_filter_re(buf, len, &re_cnt); + if (!re) + return -EINVAL; + + for (i = 0; i < re_cnt; i++) { + ret = ftrace_function_set_regexp(data->ops, filter, *reset, + re[i], strlen(re[i])); + if (ret) + break; + + if (*reset) + *reset = 0; + } + + argv_free(re); + return ret; } -#ifdef CONFIG_PERF_EVENTS +static int ftrace_function_check_pred(struct filter_pred *pred, int leaf) +{ + struct ftrace_event_field *field = pred->field; -void ftrace_profile_free_filter(struct perf_event *event) + if (leaf) { + /* + * Check the leaf predicate for function trace, verify: + * - only '==' and '!=' is used + * - the 'ip' field is used + */ + if ((pred->op != OP_EQ) && (pred->op != OP_NE)) + return -EINVAL; + + if (strcmp(field->name, "ip")) + return -EINVAL; + } else { + /* + * Check the non leaf predicate for function trace, verify: + * - only '||' is used + */ + if (pred->op != OP_OR) + return -EINVAL; + } + + return 0; +} + +static int ftrace_function_set_filter_cb(enum move_type move, + struct filter_pred *pred, + int *err, void *data) { - struct event_filter *filter = event->filter; + /* Checking the node is valid for function trace. */ + if ((move != MOVE_DOWN) || + (pred->left != FILTER_PRED_INVALID)) { + *err = ftrace_function_check_pred(pred, 0); + } else { + *err = ftrace_function_check_pred(pred, 1); + if (*err) + return WALK_PRED_ABORT; + + *err = __ftrace_function_set_filter(pred->op == OP_EQ, + pred->regex.pattern, + pred->regex.len, + data); + } - event->filter = NULL; - __free_filter(filter); + return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT; } +static int ftrace_function_set_filter(struct perf_event *event, + struct event_filter *filter) +{ + struct function_filter_data data = { + .first_filter = 1, + .first_notrace = 1, + .ops = &event->ftrace_ops, + }; + + return walk_pred_tree(filter->preds, filter->root, + ftrace_function_set_filter_cb, &data); +} +#else +static int ftrace_function_set_filter(struct perf_event *event, + struct event_filter *filter) +{ + return -ENODEV; +} +#endif /* CONFIG_FUNCTION_TRACER */ + int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str) { int err; struct event_filter *filter; - struct filter_parse_state *ps; struct ftrace_event_call *call; mutex_lock(&event_mutex); @@ -1909,33 +2119,17 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, if (event->filter) goto out_unlock; - filter = __alloc_filter(); - if (!filter) { - err = PTR_ERR(filter); - goto out_unlock; - } - - err = -ENOMEM; - ps = kzalloc(sizeof(*ps), GFP_KERNEL); - if (!ps) - goto free_filter; - - parse_init(ps, filter_ops, filter_str); - err = filter_parse(ps); + err = create_filter(call, filter_str, false, &filter); if (err) - goto free_ps; + goto free_filter; - err = replace_preds(call, filter, ps, filter_str, false); - if (!err) + if (ftrace_event_is_function(call)) + err = ftrace_function_set_filter(event, filter); + else event->filter = filter; -free_ps: - filter_opstack_clear(ps); - postfix_clear(ps); - kfree(ps); - free_filter: - if (err) + if (err || ftrace_event_is_function(call)) __free_filter(filter); out_unlock: @@ -1954,43 +2148,6 @@ out_unlock: #define CREATE_TRACE_POINTS #include "trace_events_filter_test.h" -static int test_get_filter(char *filter_str, struct ftrace_event_call *call, - struct event_filter **pfilter) -{ - struct event_filter *filter; - struct filter_parse_state *ps; - int err = -ENOMEM; - - filter = __alloc_filter(); - if (!filter) - goto out; - - ps = kzalloc(sizeof(*ps), GFP_KERNEL); - if (!ps) - goto free_filter; - - parse_init(ps, filter_ops, filter_str); - err = filter_parse(ps); - if (err) - goto free_ps; - - err = replace_preds(call, filter, ps, filter_str, false); - if (!err) - *pfilter = filter; - - free_ps: - filter_opstack_clear(ps); - postfix_clear(ps); - kfree(ps); - - free_filter: - if (err) - __free_filter(filter); - - out: - return err; -} - #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ { \ .filter = FILTER, \ @@ -2109,12 +2266,13 @@ static __init int ftrace_test_event_filter(void) struct test_filter_data_t *d = &test_filter_data[i]; int err; - err = test_get_filter(d->filter, &event_ftrace_test_filter, - &filter); + err = create_filter(&event_ftrace_test_filter, d->filter, + false, &filter); if (err) { printk(KERN_INFO "Failed to get filter for '%s', err %d\n", d->filter, err); + __free_filter(filter); break; } diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index bbeec31e0ae..7b46c9bd22a 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -18,6 +18,16 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM ftrace +/* + * The FTRACE_ENTRY_REG macro allows ftrace entry to define register + * function and thus become accesible via perf. + */ +#undef FTRACE_ENTRY_REG +#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ + filter, regfn) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ + filter) + /* not needed for this file */ #undef __field_struct #define __field_struct(type, item) @@ -44,21 +54,22 @@ #define F_printk(fmt, args...) fmt, args #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ -struct ____ftrace_##name { \ - tstruct \ -}; \ -static void __always_unused ____ftrace_check_##name(void) \ -{ \ - struct ____ftrace_##name *__entry = NULL; \ - \ - /* force compile-time check on F_printk() */ \ - printk(print); \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ +struct ____ftrace_##name { \ + tstruct \ +}; \ +static void __always_unused ____ftrace_check_##name(void) \ +{ \ + struct ____ftrace_##name *__entry = NULL; \ + \ + /* force compile-time check on F_printk() */ \ + printk(print); \ } #undef FTRACE_ENTRY_DUP -#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ - FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) +#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ + FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ + filter) #include "trace_entries.h" @@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; @@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void) \ offsetof(typeof(field), \ container.item), \ sizeof(field.container.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; @@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void) \ ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), filter_type); \ mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ @@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void) \ offsetof(typeof(field), \ container.item), \ sizeof(field.container.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; @@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void) \ #define __dynamic_array(type, item) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - 0, is_signed_type(type), FILTER_OTHER);\ + 0, is_signed_type(type), filter_type);\ if (ret) \ return ret; #undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ +#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ int \ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ { \ struct struct_name field; \ int ret; \ + int filter_type = filter; \ \ tstruct; \ \ @@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #undef F_printk #define F_printk(fmt, args...) #fmt ", " __stringify(args) -#undef FTRACE_ENTRY -#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ +#undef FTRACE_ENTRY_REG +#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ + regfn) \ \ struct ftrace_event_class event_class_ftrace_##call = { \ .system = __stringify(TRACE_SYSTEM), \ .define_fields = ftrace_define_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ + .reg = regfn, \ }; \ \ struct ftrace_event_call __used event_##call = { \ @@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = { \ struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; +#undef FTRACE_ENTRY +#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \ + FTRACE_ENTRY_REG(call, struct_name, etype, \ + PARAMS(tstruct), PARAMS(print), filter, NULL) + +int ftrace_event_is_function(struct ftrace_event_call *call) +{ + return call == &event_function; +} + #include "trace_entries.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 00d527c945a..580a05ec926 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, #endif /* CONFIG_PERF_EVENTS */ static __kprobes -int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) +int kprobe_register(struct ftrace_event_call *event, + enum trace_reg type, void *data) { struct trace_probe *tp = (struct trace_probe *)event->data; @@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) case TRACE_REG_PERF_UNREGISTER: disable_trace_probe(tp, TP_FLAG_PROFILE); return 0; + case TRACE_REG_PERF_OPEN: + case TRACE_REG_PERF_CLOSE: + case TRACE_REG_PERF_ADD: + case TRACE_REG_PERF_DEL: + return 0; #endif } return 0; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0d6ff355594..859fae6b182 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -264,7 +264,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len) return ret; } -int trace_seq_path(struct trace_seq *s, struct path *path) +int trace_seq_path(struct trace_seq *s, const struct path *path) { unsigned char *p; @@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long mask; const char *str; const char *ret = p->buffer + p->len; - int i; + int i, first = 1; for (i = 0; flag_array[i].name && flags; i++) { @@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, str = flag_array[i].name; flags &= ~mask; - if (p->len && delim) + if (!first && delim) trace_seq_puts(p, delim); + else + first = 0; trace_seq_puts(p, str); } /* check for left over flags */ if (flags) { - if (p->len && delim) + if (!first && delim) trace_seq_puts(p, delim); trace_seq_printf(p, "0x%lx", flags); } @@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, break; } - if (!p->len) + if (ret == (const char *)(p->buffer + p->len)) trace_seq_printf(p, "0x%lx", val); trace_seq_putc(p, 0); @@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, break; } - if (!p->len) + if (ret == (const char *)(p->buffer + p->len)) trace_seq_printf(p, "0x%llx", val); trace_seq_putc(p, 0); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 77575b386d9..d4545f49242 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -13,6 +13,9 @@ #include <linux/sysctl.h> #include <linux/init.h> #include <linux/fs.h> + +#include <asm/setup.h> + #include "trace.h" #define STACK_TRACE_ENTRIES 500 @@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = stack_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, }; static ssize_t @@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = { .release = seq_release, }; +static int +stack_trace_filter_open(struct inode *inode, struct file *file) +{ + return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER, + inode, file); +} + +static const struct file_operations stack_trace_filter_fops = { + .open = stack_trace_filter_open, + .read = seq_read, + .write = ftrace_filter_write, + .llseek = ftrace_regex_lseek, + .release = ftrace_regex_release, +}; + int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write, return ret; } +static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata; + static __init int enable_stacktrace(char *str) { + if (strncmp(str, "_filter=", 8) == 0) + strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE); + stack_tracer_enabled = 1; last_stack_tracer_enabled = 1; return 1; @@ -358,6 +380,12 @@ static __init int stack_trace_init(void) trace_create_file("stack_trace", 0444, d_tracer, NULL, &stack_trace_fops); + trace_create_file("stack_trace_filter", 0444, d_tracer, + NULL, &stack_trace_filter_fops); + + if (stack_trace_filter_buf[0]) + ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1); + if (stack_tracer_enabled) register_ftrace_function(&trace_ops); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cb654542c1a..96fc7336909 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); static int syscall_enter_register(struct ftrace_event_call *event, - enum trace_reg type); + enum trace_reg type, void *data); static int syscall_exit_register(struct ftrace_event_call *event, - enum trace_reg type); + enum trace_reg type, void *data); static int syscall_enter_define_fields(struct ftrace_event_call *call); static int syscall_exit_define_fields(struct ftrace_event_call *call); @@ -468,8 +468,8 @@ int __init init_ftrace_syscalls(void) unsigned long addr; int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); + syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata), + GFP_KERNEL); if (!syscalls_metadata) { WARN_ON(1); return -ENOMEM; @@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call) #endif /* CONFIG_PERF_EVENTS */ static int syscall_enter_register(struct ftrace_event_call *event, - enum trace_reg type) + enum trace_reg type, void *data) { switch (type) { case TRACE_REG_REGISTER: @@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event, case TRACE_REG_PERF_UNREGISTER: perf_sysenter_disable(event); return 0; + case TRACE_REG_PERF_OPEN: + case TRACE_REG_PERF_CLOSE: + case TRACE_REG_PERF_ADD: + case TRACE_REG_PERF_DEL: + return 0; #endif } return 0; } static int syscall_exit_register(struct ftrace_event_call *event, - enum trace_reg type) + enum trace_reg type, void *data) { switch (type) { case TRACE_REG_REGISTER: @@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event, case TRACE_REG_PERF_UNREGISTER: perf_sysexit_disable(event); return 0; + case TRACE_REG_PERF_OPEN: + case TRACE_REG_PERF_CLOSE: + case TRACE_REG_PERF_ADD: + case TRACE_REG_PERF_DEL: + return 0; #endif } return 0; |