diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 8 | ||||
-rw-r--r-- | arch/arm/include/asm/ftrace.h | 5 | ||||
-rw-r--r-- | arch/powerpc/include/asm/ftrace.h | 5 | ||||
-rw-r--r-- | arch/sh/include/asm/ftrace.h | 5 | ||||
-rw-r--r-- | arch/sparc/include/asm/ftrace.h | 5 | ||||
-rw-r--r-- | arch/x86/Kconfig.debug | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/ftrace.h | 15 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 129 | ||||
-rw-r--r-- | arch/x86/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | include/linux/ftrace.h | 5 | ||||
-rw-r--r-- | include/linux/hardirq.h | 15 | ||||
-rw-r--r-- | include/linux/marker.h | 2 | ||||
-rw-r--r-- | include/linux/tracepoint.h | 4 | ||||
-rw-r--r-- | kernel/marker.c | 80 | ||||
-rw-r--r-- | kernel/sysctl.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.c | 117 | ||||
-rw-r--r-- | kernel/tracepoint.c | 261 | ||||
-rw-r--r-- | scripts/Makefile.build | 12 | ||||
-rw-r--r-- | scripts/tracing/draw_functrace.py | 130 |
20 files changed, 612 insertions, 205 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b..4862284d311 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + ftrace=[tracer] + [ftrace] will set and start the specified tracer + as early as possible in order to facilitate early + boot debugging. + + ftrace_dump_on_oops + [ftrace] will dump the trace buffers on oops. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 39c8bc1a006..3f3a1d1508e 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef _ASM_ARM_FTRACE #define _ASM_ARM_FTRACE +#ifndef __ASSEMBLY__ +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index b298f7a631e..1cd72700fbc 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef _ASM_POWERPC_FTRACE #define _ASM_POWERPC_FTRACE +#ifndef __ASSEMBLY__ +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 3aed362c946..31ada0370cb 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -2,6 +2,11 @@ #define __ASM_SH_FTRACE_H #ifndef __ASSEMBLY__ +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + +#ifndef __ASSEMBLY__ extern void mcount(void); #endif diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index d27716cd38c..62055ac0496 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef _ASM_SPARC64_FTRACE #define _ASM_SPARC64_FTRACE +#ifndef __ASSEMBLY__ +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + #ifdef CONFIG_MCOUNT #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2a3dfbd5e67..fa013f529b7 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,14 +186,10 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE_HOOKS - bool - config MMIOTRACE bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && PCI select TRACING - select MMIOTRACE_HOOKS help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b1..a23468194b8 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); +#else +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif +#endif /* __ASSEMBLY__ */ + +#else /* CONFIG_FUNCTION_TRACER */ + +#ifndef __ASSEMBLY__ +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9b..69149337f2f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -56,6 +56,133 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } +/* + * Modifying code must take extra care. On an SMP machine, if + * the code being modified is also being executed on another CPU + * that CPU will have undefined results and possibly take a GPF. + * We use kstop_machine to stop other CPUS from exectuing code. + * But this does not stop NMIs from happening. We still need + * to protect against that. We separate out the modification of + * the code to take care of this. + * + * Two buffers are added: An IP buffer and a "code" buffer. + * + * 1) Put the instruction pointer into the IP buffer + * and the new code into the "code" buffer. + * 2) Set a flag that says we are modifying code + * 3) Wait for any running NMIs to finish. + * 4) Write the code + * 5) clear the flag. + * 6) Wait for any running NMIs to finish. + * + * If an NMI is executed, the first thing it does is to call + * "ftrace_nmi_enter". This will check if the flag is set to write + * and if it is, it will write what is in the IP and "code" buffers. + * + * The trick is, it does not matter if everyone is writing the same + * content to the code location. Also, if a CPU is executing code + * it is OK to write to that code location if the contents being written + * are the same as what exists. + */ + +static atomic_t in_nmi = ATOMIC_INIT(0); +static int mod_code_status; /* holds return value of text write */ +static int mod_code_write; /* set when NMI should do the write */ +static void *mod_code_ip; /* holds the IP to write to */ +static void *mod_code_newcode; /* holds the text to write to the IP */ + +static unsigned nmi_wait_count; +static atomic_t nmi_update_count = ATOMIC_INIT(0); + +int ftrace_arch_read_dyn_info(char *buf, int size) +{ + int r; + + r = snprintf(buf, size, "%u %u", + nmi_wait_count, + atomic_read(&nmi_update_count)); + return r; +} + +static void ftrace_mod_code(void) +{ + /* + * Yes, more than one CPU process can be writing to mod_code_status. + * (and the code itself) + * But if one were to fail, then they all should, and if one were + * to succeed, then they all should. + */ + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + MCOUNT_INSN_SIZE); + +} + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); + /* Must have in_nmi seen before reading write flag */ + smp_mb(); + if (mod_code_write) { + ftrace_mod_code(); + atomic_inc(&nmi_update_count); + } +} + +void ftrace_nmi_exit(void) +{ + /* Finish all executions before clearing in_nmi */ + smp_wmb(); + atomic_dec(&in_nmi); +} + +static void wait_for_nmi(void) +{ + int waited = 0; + + while (atomic_read(&in_nmi)) { + waited = 1; + cpu_relax(); + } + + if (waited) + nmi_wait_count++; +} + +static int +do_ftrace_mod_code(unsigned long ip, void *new_code) +{ + mod_code_ip = (void *)ip; + mod_code_newcode = new_code; + + /* The buffers need to be visible before we let NMIs write them */ + smp_wmb(); + + mod_code_write = 1; + + /* Make sure write bit is visible before we wait on NMIs */ + smp_mb(); + + wait_for_nmi(); + + /* Make sure all running NMIs have finished before we write the code */ + smp_mb(); + + ftrace_mod_code(); + + /* Make sure the write happens before clearing the bit */ + smp_wmb(); + + mod_code_write = 0; + + /* make sure NMIs see the cleared bit */ + smp_mb(); + + wait_for_nmi(); + + return mod_code_status; +} + + int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) @@ -81,7 +208,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + if (do_ftrace_mod_code(ip, new_code)) return -EPERM; sync_core(); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 59f89b434b4..0a21b7aab9d 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o -obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := pf_in.o mmio-mod.o +mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa_$(BITS).o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 31e8730fa24..4152d3c3b13 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -53,7 +53,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { -#ifdef CONFIG_MMIOTRACE_HOOKS +#ifdef CONFIG_MMIOTRACE if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2..e46a7b34037 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); + /** * ftrace_modify_code - modify code segment * @ip: the address of the code segment @@ -181,6 +184,8 @@ static inline void __ftrace_enabled_restore(int enabled) #endif #ifdef CONFIG_TRACING +extern int ftrace_dump_on_oops; + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a..0087cb43bec 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -5,6 +5,7 @@ #include <linux/smp_lock.h> #include <linux/lockdep.h> #include <asm/hardirq.h> +#include <asm/ftrace.h> #include <asm/system.h> /* @@ -161,7 +162,17 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + lockdep_off(); \ + __irq_enter(); \ + } while (0) +#define nmi_exit() \ + do { \ + __irq_exit(); \ + lockdep_on(); \ + ftrace_nmi_exit(); \ + } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c7fbb..4cf45472d9f 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, void *call_private, ...); -extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...); /* * Connect a probe to a marker. diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c5bb39c7a77..63064e9403f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe); */ extern int tracepoint_probe_unregister(const char *name, void *probe); +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + struct tracepoint_iter { struct module *module; struct tracepoint *tracepoint; diff --git a/kernel/marker.c b/kernel/marker.c index e9c6b2bc940..2898b647d41 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex); */ #define MARKER_HASH_BITS 6 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) +static struct hlist_head marker_table[MARKER_TABLE_SIZE]; /* * Note about RCU : @@ -64,11 +65,10 @@ struct marker_entry { void *oldptr; int rcu_pending; unsigned char ptype:1; + unsigned char format_allocated:1; char name[0]; /* Contains name'\0'format'\0' */ }; -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - /** * __mark_empty_function - Empty probe callback * @probe_private: probe private data @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * * Should be connected to markers "MARK_NOARGS". */ -void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) +static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) { va_list args; /* not initialized */ char ptype; @@ -197,7 +197,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) } rcu_read_unlock_sched(); } -EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); static void free_old_closure(struct rcu_head *head) { @@ -416,6 +415,7 @@ static struct marker_entry *add_marker(const char *name, const char *format) e->single.probe_private = NULL; e->multi = NULL; e->ptype = 0; + e->format_allocated = 0; e->refcount = 0; e->rcu_pending = 0; hlist_add_head(&e->hlist, head); @@ -447,6 +447,8 @@ static int remove_marker(const char *name) if (e->single.func != __mark_empty_function) return -EBUSY; hlist_del(&e->hlist); + if (e->format_allocated) + kfree(e->format); /* Make sure the call_rcu has been executed */ if (e->rcu_pending) rcu_barrier_sched(); @@ -457,57 +459,34 @@ static int remove_marker(const char *name) /* * Set the mark_entry format to the format found in the element. */ -static int marker_set_format(struct marker_entry **entry, const char *format) +static int marker_set_format(struct marker_entry *entry, const char *format) { - struct marker_entry *e; - size_t name_len = strlen((*entry)->name) + 1; - size_t format_len = strlen(format) + 1; - - - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) + entry->format = kstrdup(format, GFP_KERNEL); + if (!entry->format) return -ENOMEM; - memcpy(&e->name[0], (*entry)->name, name_len); - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - e->single = (*entry)->single; - e->multi = (*entry)->multi; - e->ptype = (*entry)->ptype; - e->refcount = (*entry)->refcount; - e->rcu_pending = 0; - hlist_add_before(&e->hlist, &(*entry)->hlist); - hlist_del(&(*entry)->hlist); - /* Make sure the call_rcu has been executed */ - if ((*entry)->rcu_pending) - rcu_barrier_sched(); - kfree(*entry); - *entry = e; + entry->format_allocated = 1; + trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); + entry->name, entry->format); return 0; } /* * Sets the probe callback corresponding to one marker. */ -static int set_marker(struct marker_entry **entry, struct marker *elem, +static int set_marker(struct marker_entry *entry, struct marker *elem, int active) { int ret; - WARN_ON(strcmp((*entry)->name, elem->name) != 0); + WARN_ON(strcmp(entry->name, elem->name) != 0); - if ((*entry)->format) { - if (strcmp((*entry)->format, elem->format) != 0) { + if (entry->format) { + if (strcmp(entry->format, elem->format) != 0) { printk(KERN_NOTICE "Format mismatch for probe %s " "(%s), marker (%s)\n", - (*entry)->name, - (*entry)->format, + entry->name, + entry->format, elem->format); return -EPERM; } @@ -523,34 +502,33 @@ static int set_marker(struct marker_entry **entry, struct marker *elem, * pass from a "safe" callback (with argument) to an "unsafe" * callback (does not set arguments). */ - elem->call = (*entry)->call; + elem->call = entry->call; /* * Sanity check : * We only update the single probe private data when the ptr is * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) */ WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private - != (*entry)->single.probe_private && - !elem->ptype); - elem->single.probe_private = (*entry)->single.probe_private; + && elem->single.probe_private != entry->single.probe_private + && !elem->ptype); + elem->single.probe_private = entry->single.probe_private; /* * Make sure the private data is valid when we update the * single probe ptr. */ smp_wmb(); - elem->single.func = (*entry)->single.func; + elem->single.func = entry->single.func; /* * We also make sure that the new probe callbacks array is consistent * before setting a pointer to it. */ - rcu_assign_pointer(elem->multi, (*entry)->multi); + rcu_assign_pointer(elem->multi, entry->multi); /* * Update the function or multi probe array pointer before setting the * ptype. */ smp_wmb(); - elem->ptype = (*entry)->ptype; + elem->ptype = entry->ptype; elem->state = active; return 0; @@ -594,8 +572,7 @@ void marker_update_probe_range(struct marker *begin, for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); if (mark_entry) { - set_marker(&mark_entry, iter, - !!mark_entry->refcount); + set_marker(mark_entry, iter, !!mark_entry->refcount); /* * ignore error, continue */ @@ -657,7 +634,7 @@ int marker_probe_register(const char *name, const char *format, ret = PTR_ERR(entry); } else if (format) { if (!entry->format) - ret = marker_set_format(&entry, format); + ret = marker_set_format(entry, format); else if (strcmp(entry->format, format)) ret = -EPERM; } @@ -848,8 +825,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, if (!e->ptype) { if (num == 0 && e->single.func == probe) return e->single.probe_private; - else - break; } else { struct marker_probe_closure *closure; int match = 0; @@ -861,6 +836,7 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe, return closure[i].probe_private; } } + break; } } return ERR_PTR(-ENOENT); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d048fa2d90..6b6b727258b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif +#ifdef CONFIG_TRACING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_dump_on_opps", + .data = &ftrace_dump_on_oops, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85bee775a03..e4c40c868d6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -64,6 +64,37 @@ static cpumask_t __read_mostly tracing_buffer_mask; static int tracing_disabled = 1; +/* + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops + * + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops + * is set, then ftrace_dump is called. This will output the contents + * of the ftrace buffers to the console. This is very useful for + * capturing traces that lead to crashes and outputing it to a + * serial console. + * + * It is default off, but you can enable it with either specifying + * "ftrace_dump_on_oops" in the kernel command line, or setting + * /proc/sys/kernel/ftrace_dump_on_oops to true. + */ +int ftrace_dump_on_oops; + +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + +static int __init set_ftrace_dump_on_oops(char *str) +{ + ftrace_dump_on_oops = 1; + return 1; +} +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); + long ns2usecs(cycle_t nsec) { @@ -2374,29 +2405,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_set_tracer(char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + int ret = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2420,6 +2433,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); + return ret; +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + ret = tracing_set_tracer(buf); + if (!ret) + ret = cnt; + if (ret > 0) filp->f_pos += ret; @@ -2822,22 +2862,38 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE +int __weak ftrace_arch_read_dyn_info(char *buf, int size) +{ + return 0; +} + static ssize_t -tracing_read_long(struct file *filp, char __user *ubuf, +tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + static char ftrace_dyn_info_buffer[1024]; + static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; - char buf[64]; + char *buf = ftrace_dyn_info_buffer; + int size = ARRAY_SIZE(ftrace_dyn_info_buffer); int r; - r = sprintf(buf, "%ld\n", *p); + mutex_lock(&dyn_info_mutex); + r = sprintf(buf, "%ld ", *p); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); + buf[r++] = '\n'; + + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); + + mutex_unlock(&dyn_info_mutex); + + return r; } -static struct file_operations tracing_read_long_fops = { +static struct file_operations tracing_dyn_info_fops = { .open = tracing_open_generic, - .read = tracing_read_long, + .read = tracing_read_dyn_info, }; #endif @@ -2946,7 +3002,7 @@ static __init int tracer_init_debugfs(void) #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, - &tracing_read_long_fops); + &tracing_dyn_info_fops); if (!entry) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); @@ -3027,7 +3083,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); return NOTIFY_OK; } @@ -3043,7 +3100,8 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); break; default: break; @@ -3084,7 +3142,6 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } - void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index af8c8566488..e96590f17de 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex); */ #define TRACEPOINT_HASH_BITS 6 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) +static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; /* * Note about RCU : @@ -54,40 +55,43 @@ struct tracepoint_entry { struct hlist_node hlist; void **funcs; int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - unsigned char rcu_pending:1; char name[0]; }; -static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; +struct tp_probes { + union { + struct rcu_head rcu; + struct list_head list; + } u; + void *probes[0]; +}; -static void free_old_closure(struct rcu_head *head) +static inline void *allocate_probes(int count) { - struct tracepoint_entry *entry = container_of(head, - struct tracepoint_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; + struct tp_probes *p = kmalloc(count * sizeof(void *) + + sizeof(struct tp_probes), GFP_KERNEL); + return p == NULL ? NULL : p->probes; } -static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) +static void rcu_free_old_probes(struct rcu_head *head) { - if (!old) - return; - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); + kfree(container_of(head, struct tp_probes, u.rcu)); +} + +static inline void release_probes(void *old) +{ + if (old) { + struct tp_probes *tp_probes = container_of(old, + struct tp_probes, probes[0]); + call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); + } } static void debug_print_probes(struct tracepoint_entry *entry) { int i; - if (!tracepoint_debug) + if (!tracepoint_debug || !entry->funcs) return; for (i = 0; entry->funcs[i]; i++) @@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) return ERR_PTR(-EEXIST); } /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); + new = allocate_probes(nr_probes + 2); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) memcpy(new, old, nr_probes * sizeof(void *)); new[nr_probes] = probe; + new[nr_probes + 1] = NULL; entry->refcount = nr_probes + 1; entry->funcs = new; debug_print_probes(entry); @@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) old = entry->funcs; if (!old) - return NULL; + return ERR_PTR(-ENOENT); debug_print_probes(entry); /* (N -> M), (N > 1, M >= 0) probes */ @@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) int j = 0; /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(void *), GFP_KERNEL); + new = allocate_probes(nr_probes - nr_del + 1); if (new == NULL) return ERR_PTR(-ENOMEM); for (i = 0; old[i]; i++) if ((probe && old[i] != probe)) new[j++] = old[i]; + new[nr_probes - nr_del] = NULL; entry->refcount = nr_probes - nr_del; entry->funcs = new; } @@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name) memcpy(&e->name[0], name, name_len); e->funcs = NULL; e->refcount = 0; - e->rcu_pending = 0; hlist_add_head(&e->hlist, head); return e; } @@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name) * Remove the tracepoint from the tracepoint hash table. Must be called with * mutex_lock held. */ -static int remove_tracepoint(const char *name) +static inline void remove_tracepoint(struct tracepoint_entry *e) { - struct hlist_head *head; - struct hlist_node *node; - struct tracepoint_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->refcount) - return -EBUSY; hlist_del(&e->hlist); - /* Make sure the call_rcu_sched has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); kfree(e); - return 0; } /* @@ -320,6 +302,23 @@ static void tracepoint_update_probes(void) module_update_tracepoints(); } +static void *tracepoint_add_probe(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + + entry = get_tracepoint(name); + if (!entry) { + entry = add_tracepoint(name); + if (IS_ERR(entry)) + return entry; + } + old = tracepoint_entry_add_probe(entry, probe); + if (IS_ERR(old) && !entry->refcount) + remove_tracepoint(entry); + return old; +} + /** * tracepoint_probe_register - Connect a probe to a tracepoint * @name: tracepoint name @@ -330,44 +329,36 @@ static void tracepoint_update_probes(void) */ int tracepoint_probe_register(const char *name, void *probe) { - struct tracepoint_entry *entry; - int ret = 0; void *old; mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) { - entry = add_tracepoint(name); - if (IS_ERR(entry)) { - ret = PTR_ERR(entry); - goto end; - } - } - /* - * If we detect that a call_rcu_sched is pending for this tracepoint, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = tracepoint_entry_add_probe(entry, probe); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } + old = tracepoint_add_probe(name, probe); mutex_unlock(&tracepoints_mutex); + if (IS_ERR(old)) + return PTR_ERR(old); + tracepoint_update_probes(); /* may update entry */ - mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - WARN_ON(!entry); - if (entry->rcu_pending) - rcu_barrier_sched(); - tracepoint_entry_free_old(entry, old); -end: - mutex_unlock(&tracepoints_mutex); - return ret; + release_probes(old); + return 0; } EXPORT_SYMBOL_GPL(tracepoint_probe_register); +static void *tracepoint_remove_probe(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + + entry = get_tracepoint(name); + if (!entry) + return ERR_PTR(-ENOENT); + old = tracepoint_entry_remove_probe(entry, probe); + if (IS_ERR(old)) + return old; + if (!entry->refcount) + remove_tracepoint(entry); + return old; +} + /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @name: tracepoint name @@ -380,38 +371,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register); */ int tracepoint_probe_unregister(const char *name, void *probe) { - struct tracepoint_entry *entry; void *old; - int ret = -ENOENT; mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = tracepoint_entry_remove_probe(entry, probe); - if (!old) { - printk(KERN_WARNING "Warning: Trying to unregister a probe" - "that doesn't exist\n"); - goto end; - } + old = tracepoint_remove_probe(name, probe); mutex_unlock(&tracepoints_mutex); + if (IS_ERR(old)) + return PTR_ERR(old); + tracepoint_update_probes(); /* may update entry */ + release_probes(old); + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); + +static LIST_HEAD(old_probes); +static int need_update; + +static void tracepoint_add_old_probes(void *old) +{ + need_update = 1; + if (old) { + struct tp_probes *tp_probes = container_of(old, + struct tp_probes, probes[0]); + list_add(&tp_probes->u.list, &old_probes); + } +} + +/** + * tracepoint_probe_register_noupdate - register a probe but not connect + * @name: tracepoint name + * @probe: probe handler + * + * caller must call tracepoint_probe_update_all() + */ +int tracepoint_probe_register_noupdate(const char *name, void *probe) +{ + void *old; + mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - tracepoint_entry_free_old(entry, old); - remove_tracepoint(name); /* Ignore busy error message */ - ret = 0; -end: + old = tracepoint_add_probe(name, probe); + if (IS_ERR(old)) { + mutex_unlock(&tracepoints_mutex); + return PTR_ERR(old); + } + tracepoint_add_old_probes(old); mutex_unlock(&tracepoints_mutex); - return ret; + return 0; } -EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); + +/** + * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect + * @name: tracepoint name + * @probe: probe function pointer + * + * caller must call tracepoint_probe_update_all() + */ +int tracepoint_probe_unregister_noupdate(const char *name, void *probe) +{ + void *old; + + mutex_lock(&tracepoints_mutex); + old = tracepoint_remove_probe(name, probe); + if (IS_ERR(old)) { + mutex_unlock(&tracepoints_mutex); + return PTR_ERR(old); + } + tracepoint_add_old_probes(old); + mutex_unlock(&tracepoints_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); + +/** + * tracepoint_probe_update_all - update tracepoints + */ +void tracepoint_probe_update_all(void) +{ + LIST_HEAD(release_probes); + struct tp_probes *pos, *next; + + mutex_lock(&tracepoints_mutex); + if (!need_update) { + mutex_unlock(&tracepoints_mutex); + return; + } + if (!list_empty(&old_probes)) + list_replace_init(&old_probes, &release_probes); + need_update = 0; + mutex_unlock(&tracepoints_mutex); + + tracepoint_update_probes(); + list_for_each_entry_safe(pos, next, &release_probes, u.list) { + list_del(&pos->u.list); + call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); + } +} +EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); /** * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 468fbc9016c..7a176773af8 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -198,16 +198,10 @@ cmd_modversions = \ fi; endif -ifdef CONFIG_64BIT -arch_bits = 64 -else -arch_bits = 32 -endif - ifdef CONFIG_FTRACE_MCOUNT_RECORD -cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \ - "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \ - "$(NM)" "$(RM)" "$(MV)" "$(@)"; +cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ + "$(if $(CONFIG_64BIT),64,32)" \ + "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)"; endif define rule_cc_o_c diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py new file mode 100644 index 00000000000..902f9a99262 --- /dev/null +++ b/scripts/tracing/draw_functrace.py @@ -0,0 +1,130 @@ +#!/usr/bin/python + +""" +Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com> +Licensed under the terms of the GNU GPL License version 2 + +This script parses a trace provided by the function tracer in +kernel/trace/trace_functions.c +The resulted trace is processed into a tree to produce a more human +view of the call stack by drawing textual but hierarchical tree of +calls. Only the functions's names and the the call time are provided. + +Usage: + Be sure that you have CONFIG_FUNCTION_TRACER + # mkdir /debugfs + # mount -t debug debug /debug + # echo function > /debug/tracing/current_tracer + $ cat /debug/tracing/trace_pipe > ~/raw_trace_func + Wait some times but not too much, the script is a bit slow. + Break the pipe (Ctrl + Z) + $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + Then you have your drawn trace in draw_functrace +""" + + +import sys, re + +class CallTree: + """ This class provides a tree representation of the functions + call stack. If a function has no parent in the kernel (interrupt, + syscall, kernel thread...) then it is attached to a virtual parent + called ROOT. + """ + ROOT = None + + def __init__(self, func, time = None, parent = None): + self._func = func + self._time = time + if parent is None: + self._parent = CallTree.ROOT + else: + self._parent = parent + self._children = [] + + def calls(self, func, calltime): + """ If a function calls another one, call this method to insert it + into the tree at the appropriate place. + @return: A reference to the newly created child node. + """ + child = CallTree(func, calltime, self) + self._children.append(child) + return child + + def getParent(self, func): + """ Retrieve the last parent of the current node that + has the name given by func. If this function is not + on a parent, then create it as new child of root + @return: A reference to the parent. + """ + tree = self + while tree != CallTree.ROOT and tree._func != func: + tree = tree._parent + if tree == CallTree.ROOT: + child = CallTree.ROOT.calls(func, None) + return child + return tree + + def __repr__(self): + return self.__toString("", True) + + def __toString(self, branch, lastChild): + if self._time is not None: + s = "%s----%s (%s)\n" % (branch, self._func, self._time) + else: + s = "%s----%s\n" % (branch, self._func) + + i = 0 + if lastChild: + branch = branch[:-1] + " " + while i < len(self._children): + if i != len(self._children) - 1: + s += "%s" % self._children[i].__toString(branch +\ + " |", False) + else: + s += "%s" % self._children[i].__toString(branch +\ + " |", True) + i += 1 + return s + +class BrokenLineException(Exception): + """If the last line is not complete because of the pipe breakage, + we want to stop the processing and ignore this line. + """ + pass + +class CommentLineException(Exception): + """ If the line is a comment (as in the beginning of the trace file), + just ignore it. + """ + pass + + +def parseLine(line): + line = line.strip() + if line.startswith("#"): + raise CommentLineException + m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + if m is None: + raise BrokenLineException + return (m.group(1), m.group(2), m.group(3)) + + +def main(): + CallTree.ROOT = CallTree("Root (Nowhere)", None, None) + tree = CallTree.ROOT + + for line in sys.stdin: + try: + calltime, callee, caller = parseLine(line) + except BrokenLineException: + break + except CommentLineException: + continue + tree = tree.getParent(caller) + tree = tree.calls(callee, calltime) + + print CallTree.ROOT + +if __name__ == "__main__": + main() |