From 6d07bb47354174a9b52d3b03f9e38b069a93d341 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: powerpc: ftrace, do not latency trace idle Impact: fix for irq off latency tracer When idle is called, interrupts are disabled, but the idle function will still wake up on an interrupt. The problem is that the interrupt disabled latency tracer will take this call to idle as a latency. This patch disables the latency tracing when going into idle. Signed-off-by: Steven Rostedt --- arch/powerpc/kernel/idle.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 31982d05d81..88d9c1d5e5f 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -69,10 +69,15 @@ void cpu_idle(void) smp_mb(); local_irq_disable(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); + /* check again after disabling irqs */ if (!need_resched() && !cpu_should_die()) ppc_md.power_save(); + start_critical_timings(); + local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); -- cgit v1.2.3-70-g09d2 From 8fd6e5a8c81e2e9b912ea33c8425a10729db469b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: powerpc: ftrace, convert to new dynamic ftrace arch API Impact: update to PowerPC ftrace arch API This patch converts PowerPC to use the new dynamic ftrace arch API. Thanks to Paul Mackennas for pointing out the mistakes of my original test_24bit_addr function. Signed-off-by: Steven Rostedt --- arch/powerpc/include/asm/ftrace.h | 14 +++++++- arch/powerpc/kernel/ftrace.c | 67 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index b298f7a631e..17efecc2bf0 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -7,7 +7,19 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); -#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* reloction of mcount call site is the same as the address */ + return addr; +} + +struct dyn_arch_ftrace { + /* nothing yet */ +}; +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index f4b006ed0ab..24c023a5cae 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -33,12 +33,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -unsigned char *ftrace_nop_replace(void) +static unsigned char *ftrace_nop_replace(void) { return (char *)&ftrace_nop; } -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static unsigned int op; @@ -68,7 +68,7 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) # define _ASM_PTR " .long " #endif -int +static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -113,6 +113,62 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return faulted; } +static int test_24bit_addr(unsigned long ip, unsigned long addr) +{ + long diff; + + /* + * Can we get to addr from ip in 24 bits? + * (26 really, since we mulitply by 4 for 4 byte alignment) + */ + diff = addr - ip; + + /* + * Return true if diff is less than 1 << 25 + * and greater than -1 << 26. + */ + return (diff < (1 << 25)) && (diff > (-1 << 26)); +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *old, *new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(rec->ip, addr)) { + /* within range */ + old = ftrace_call_replace(rec->ip, addr); + new = ftrace_nop_replace(); + return ftrace_modify_code(rec->ip, old, new); + } + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *old, *new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(rec->ip, addr)) { + /* within range */ + old = ftrace_nop_replace(); + new = ftrace_call_replace(rec->ip, addr); + return ftrace_modify_code(rec->ip, old, new); + } + + return 0; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -128,9 +184,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - /* This is running in kstop_machine */ + /* caller expects data to be zero */ + unsigned long *p = data; - ftrace_mcount_set(data); + *p = 0; return 0; } -- cgit v1.2.3-70-g09d2 From e4486fe316895e87672a563c4f36393218f84ff1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:20 -0800 Subject: powerpc: ftrace, use probe_kernel API to modify code Impact: use cleaner probe_kernel API over assembly Using probe_kernel_read/write interface is a much cleaner approach than the current assembly version. Signed-off-by: Steven Rostedt --- arch/powerpc/kernel/ftrace.c | 53 ++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 24c023a5cae..1adfbb268d8 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -72,45 +73,33 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned replaced; - unsigned old = *(unsigned *)old_code; - unsigned new = *(unsigned *)new_code; - int faulted = 0; + unsigned char replaced[MCOUNT_INSN_SIZE]; /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting - * as well as code changing. + * as well as code changing. We do this by using the + * probe_kernel_* functions. * * No real locking needed, this code is run through - * kstop_machine. + * kstop_machine, or before SMP starts. */ - asm volatile ( - "1: lwz %1, 0(%2)\n" - " cmpw %1, %5\n" - " bne 2f\n" - " stwu %3, 0(%2)\n" - "2:\n" - ".section .fixup, \"ax\"\n" - "3: li %0, 1\n" - " b 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - _ASM_ALIGN "\n" - _ASM_PTR "1b, 3b\n" - ".previous" - : "=r"(faulted), "=r"(replaced) - : "r"(ip), "r"(new), - "0"(faulted), "r"(old) - : "memory"); - - if (replaced != old && replaced != new) - faulted = 2; - - if (!faulted) - flush_icache_range(ip, ip + 8); - - return faulted; + + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + + /* replace the text with the new text */ + if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; } static int test_24bit_addr(unsigned long ip, unsigned long addr) -- cgit v1.2.3-70-g09d2 From f48cb8b48b0b10025ca9c451b9b32cac3fcd33ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 20:47:03 -0800 Subject: powerpc/ppc64: ftrace, handle module trampolines for dyn ftrace Impact: Allow 64 bit PowerPC to trace modules with dynamic ftrace This adds code to handle the PPC64 module trampolines, and allows for PPC64 to use dynamic ftrace. Thanks to Paul Mackerras for these updates: - fix the mod and rec->arch.mod NULL checks. - fix to is_bl_op compare. Thanks to Milton Miller for: - finding the nasty race with using two nops, and recommending instead that I use a branch 8 forward. Signed-off-by: Steven Rostedt --- arch/powerpc/include/asm/ftrace.h | 2 +- arch/powerpc/include/asm/module.h | 11 ++ arch/powerpc/kernel/ftrace.c | 278 ++++++++++++++++++++++++++++++++++++-- arch/powerpc/kernel/module_64.c | 13 ++ 4 files changed, 293 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 17efecc2bf0..e5f2ae8362f 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -16,7 +16,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } struct dyn_arch_ftrace { - /* nothing yet */ + struct module *mod; }; #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index e5f14b13ccf..340bc699b62 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -34,6 +34,11 @@ struct mod_arch_specific { #ifdef __powerpc64__ unsigned int stubs_section; /* Index of stubs section in module */ unsigned int toc_section; /* What section is the TOC? */ +#ifdef CONFIG_DYNAMIC_FTRACE + unsigned long toc; + unsigned long tramp; +#endif + #else /* Indices of PLT sections within module. */ unsigned int core_plt_section; @@ -68,6 +73,12 @@ struct mod_arch_specific { # endif /* MODULE */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE +# ifdef MODULE + asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous"); +# endif /* MODULE */ +#endif + struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1adfbb268d8..1aec559bdfc 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -10,22 +10,29 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) do { } while (0) +#endif -static unsigned int ftrace_nop = 0x60000000; +static unsigned int ftrace_nop = PPC_NOP_INSTR; #ifdef CONFIG_PPC32 # define GET_ADDR(addr) addr #else /* PowerPC64's functions are data that points to the functions */ -# define GET_ADDR(addr) *(unsigned long *)addr +# define GET_ADDR(addr) (*(unsigned long *)addr) #endif @@ -102,6 +109,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } +/* + * Helper functions that are the same for both PPC64 and PPC32. + */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { long diff; @@ -119,43 +129,292 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr) return (diff < (1 << 25)) && (diff > (-1 << 26)); } +static int is_bl_op(unsigned int op) +{ + return (op & 0xfc000003) == 0x48000001; +} + +static int test_offset(unsigned long offset) +{ + return (offset + 0x2000000 > 0x3ffffff) || ((offset & 3) != 0); +} + +static unsigned long find_bl_target(unsigned long ip, unsigned int op) +{ + static int offset; + + offset = (op & 0x03fffffc); + /* make it signed */ + if (offset & 0x02000000) + offset |= 0xfe000000; + + return ip + (long)offset; +} + +static unsigned int branch_offset(unsigned long offset) +{ + /* return "bl ip+offset" */ + return 0x48000001 | (offset & 0x03fffffc); +} + +#ifdef CONFIG_PPC64 +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char replaced[MCOUNT_INSN_SIZE * 2]; + unsigned int *op = (unsigned *)&replaced; + unsigned char jmp[8]; + unsigned long *ptr = (unsigned long *)&jmp; + unsigned long ip = rec->ip; + unsigned long tramp; + int offset; + + /* read where this goes */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(*op)) { + printk(KERN_ERR "Not expected bl: opcode is %x\n", *op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, *op); + + /* + * On PPC64 the trampoline looks like: + * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, + * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, + * Where the bytes 2,3,6 and 7 make up the 32bit offset + * to the TOC that holds the pointer. + * to jump to. + * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1) + * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12) + * The actually address is 32 bytes from the offset + * into the TOC. + * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) + */ + + DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); + + /* Find where the trampoline jumps to */ + if (probe_kernel_read(jmp, (void *)tramp, 8)) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + DEBUGP(" %08x %08x", + (unsigned)(*ptr >> 32), + (unsigned)*ptr); + + offset = (unsigned)jmp[2] << 24 | + (unsigned)jmp[3] << 16 | + (unsigned)jmp[6] << 8 | + (unsigned)jmp[7]; + + DEBUGP(" %x ", offset); + + /* get the address this jumps too */ + tramp = mod->arch.toc + offset + 32; + DEBUGP("toc: %lx", tramp); + + if (probe_kernel_read(jmp, (void *)tramp, 8)) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + DEBUGP(" %08x %08x\n", + (unsigned)(*ptr >> 32), + (unsigned)*ptr); + + /* This should match what was called */ + if (*ptr != GET_ADDR(addr)) { + printk(KERN_ERR "addr does not match %lx\n", *ptr); + return -EINVAL; + } + + /* + * We want to nop the line, but the next line is + * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1) + * This needs to be turned to a nop too. + */ + if (probe_kernel_read(replaced, (void *)(ip+4), MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (*op != 0xe8410028) { + printk(KERN_ERR "Next line is not ld! (%08x)\n", *op); + return -EINVAL; + } + + /* + * Milton Miller pointed out that we can not blindly do nops. + * If a task was preempted when calling a trace function, + * the nops will remove the way to restore the TOC in r2 + * and the r2 TOC will get corrupted. + */ + + /* + * Replace: + * bl <==== will be replaced with "b 1f" + * ld r2,40(r1) + * 1: + */ + op[0] = 0x48000008; /* b +8 */ + + if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +} + +#else /* !PPC64 */ +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + /* Ignore modules for PPC32 (for now) */ + return 0; +} +#endif /* PPC64 */ + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned char *old, *new; + unsigned long ip = rec->ip; /* * If the calling address is more that 24 bits away, * then we had to use a trampoline to make the call. * Otherwise just update the call site. */ - if (test_24bit_addr(rec->ip, addr)) { + if (test_24bit_addr(ip, addr)) { /* within range */ - old = ftrace_call_replace(rec->ip, addr); + old = ftrace_call_replace(ip, addr); new = ftrace_nop_replace(); - return ftrace_modify_code(rec->ip, old, new); + return ftrace_modify_code(ip, old, new); + } + +#ifdef CONFIG_PPC64 + /* + * Out of range jumps are called from modules. + * We should either already have a pointer to the module + * or it has been passed in. + */ + if (!rec->arch.mod) { + if (!mod) { + printk(KERN_ERR "No module loaded addr=%lx\n", + addr); + return -EFAULT; + } + rec->arch.mod = mod; + } else if (mod) { + if (mod != rec->arch.mod) { + printk(KERN_ERR + "Record mod %p not equal to passed in mod %p\n", + rec->arch.mod, mod); + return -EINVAL; + } + /* nothing to do if mod == rec->arch.mod */ + } else + mod = rec->arch.mod; +#endif /* CONFIG_PPC64 */ + + return __ftrace_make_nop(mod, rec, addr); + +} + +#ifdef CONFIG_PPC64 +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char replaced[MCOUNT_INSN_SIZE * 2]; + unsigned int *op = (unsigned *)&replaced; + unsigned long ip = rec->ip; + unsigned long offset; + + /* read where this goes */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE * 2)) + return -EFAULT; + + /* + * It should be pointing to two nops or + * b +8; ld r2,40(r1) + */ + if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) && + ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) { + printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]); + return -EINVAL; + } + + /* If we never set up a trampoline to ftrace_caller, then bail */ + if (!rec->arch.mod->arch.tramp) { + printk(KERN_ERR "No ftrace trampoline\n"); + return -EINVAL; + } + + /* now calculate a jump to the ftrace caller trampoline */ + offset = rec->arch.mod->arch.tramp - ip; + + if (test_offset(offset)) { + printk(KERN_ERR "REL24 %li out of range!\n", + (long int)offset); + return -EINVAL; } + /* Set to "bl addr" */ + op[0] = branch_offset(offset); + /* ld r2,40(r1) */ + op[1] = 0xe8410028; + + DEBUGP("write to %lx\n", rec->ip); + + if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE * 2)) + return -EPERM; + return 0; } +#else +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + /* PPC32 ignores modules for now */ + return 0; +} +#endif /* CONFIG_PPC64 */ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned char *old, *new; + unsigned long ip = rec->ip; /* * If the calling address is more that 24 bits away, * then we had to use a trampoline to make the call. * Otherwise just update the call site. */ - if (test_24bit_addr(rec->ip, addr)) { + if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_nop_replace(); - new = ftrace_call_replace(rec->ip, addr); - return ftrace_modify_code(rec->ip, old, new); + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new); } - return 0; +#ifdef CONFIG_PPC64 + /* + * Out of range jumps are called from modules. + * Being that we are converting from nop, it had better + * already have a module defined. + */ + if (!rec->arch.mod) { + printk(KERN_ERR "No module loaded\n"); + return -EINVAL; + } +#endif + + return __ftrace_make_call(rec, addr); } int ftrace_update_ftrace_func(ftrace_func_t func) @@ -180,4 +439,3 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } - diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 1af2377e499..8992b031a7b 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } +#ifdef CONFIG_DYNAMIC_FTRACE + /* make the trampoline to the ftrace_caller */ + relocs++; +#endif + DEBUGP("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } } +#ifdef CONFIG_DYNAMIC_FTRACE + me->arch.toc = my_r2(sechdrs, me); + me->arch.tramp = stub_for_addr(sechdrs, + (unsigned long)ftrace_caller, + me); +#endif + return 0; } -- cgit v1.2.3-70-g09d2 From 7cc45e64323c8a1042f56e6a8d1dc982f98d52a8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 15 Nov 2008 02:39:05 -0500 Subject: powerpc/ppc32: ftrace, dynamic ftrace to handle modules Impact: add ability to trace modules on 32 bit PowerPC This patch performs the necessary trampoline calls to handle modules with dynamic ftrace on 32 bit PowerPC. Signed-off-by: Steven Rostedt --- arch/powerpc/include/asm/module.h | 5 +- arch/powerpc/kernel/ftrace.c | 101 +++++++++++++++++++++++++++++++++++--- arch/powerpc/kernel/module_32.c | 10 ++++ 3 files changed, 109 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 340bc699b62..08454880a2c 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -39,11 +39,14 @@ struct mod_arch_specific { unsigned long tramp; #endif -#else +#else /* powerpc64 */ /* Indices of PLT sections within module. */ unsigned int core_plt_section; unsigned int init_plt_section; +#ifdef CONFIG_DYNAMIC_FTRACE + unsigned long tramp; #endif +#endif /* powerpc64 */ /* List of BUG addresses, source line numbers and filenames */ struct list_head bug_list; diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1aec559bdfc..3271cd698e4 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -274,7 +274,63 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Ignore modules for PPC32 (for now) */ + unsigned char replaced[MCOUNT_INSN_SIZE]; + unsigned int *op = (unsigned *)&replaced; + unsigned char jmp[8]; + unsigned int *ptr = (unsigned int *)&jmp; + unsigned long ip = rec->ip; + unsigned long tramp; + int offset; + + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(*op)) { + printk(KERN_ERR "Not expected bl: opcode is %x\n", *op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, *op); + + /* + * On PPC32 the trampoline looks like: + * lis r11,sym@ha + * addi r11,r11,sym@l + * mtctr r11 + * bctr + */ + + DEBUGP("ip:%lx jumps to %lx", ip, tramp); + + /* Find where the trampoline jumps to */ + if (probe_kernel_read(jmp, (void *)tramp, 8)) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + DEBUGP(" %08x %08x ", ptr[0], ptr[1]); + + tramp = (ptr[1] & 0xffff) | + ((ptr[0] & 0xffff) << 16); + if (tramp & 0x8000) + tramp -= 0x10000; + + DEBUGP(" %x ", tramp); + + if (tramp != addr) { + printk(KERN_ERR + "Trampoline location %08lx does not match addr\n", + tramp); + return -EINVAL; + } + + op[0] = PPC_NOP_INSTR; + + if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE)) + return -EPERM; + return 0; } #endif /* PPC64 */ @@ -297,7 +353,6 @@ int ftrace_make_nop(struct module *mod, return ftrace_modify_code(ip, old, new); } -#ifdef CONFIG_PPC64 /* * Out of range jumps are called from modules. * We should either already have a pointer to the module @@ -320,7 +375,6 @@ int ftrace_make_nop(struct module *mod, /* nothing to do if mod == rec->arch.mod */ } else mod = rec->arch.mod; -#endif /* CONFIG_PPC64 */ return __ftrace_make_nop(mod, rec, addr); @@ -380,7 +434,44 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - /* PPC32 ignores modules for now */ + unsigned char replaced[MCOUNT_INSN_SIZE]; + unsigned int *op = (unsigned *)&replaced; + unsigned long ip = rec->ip; + unsigned long offset; + + /* read where this goes */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* It should be pointing to a nop */ + if (op[0] != PPC_NOP_INSTR) { + printk(KERN_ERR "Expected NOP but have %x\n", op[0]); + return -EINVAL; + } + + /* If we never set up a trampoline to ftrace_caller, then bail */ + if (!rec->arch.mod->arch.tramp) { + printk(KERN_ERR "No ftrace trampoline\n"); + return -EINVAL; + } + + /* now calculate a jump to the ftrace caller trampoline */ + offset = rec->arch.mod->arch.tramp - ip; + + if (test_offset(offset)) { + printk(KERN_ERR "REL24 %li out of range!\n", + (long int)offset); + return -EINVAL; + } + + /* Set to "bl addr" */ + op[0] = branch_offset(offset); + + DEBUGP("write to %lx\n", rec->ip); + + if (probe_kernel_write((void *)ip, replaced, MCOUNT_INSN_SIZE)) + return -EPERM; + return 0; } #endif /* CONFIG_PPC64 */ @@ -402,7 +493,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(ip, old, new); } -#ifdef CONFIG_PPC64 /* * Out of range jumps are called from modules. * Being that we are converting from nop, it had better @@ -412,7 +502,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) printk(KERN_ERR "No module loaded\n"); return -EINVAL; } -#endif return __ftrace_make_call(rec, addr); } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2df91a03462..f832773fc28 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) r_addend = rela[i].r_addend; } +#ifdef CONFIG_DYNAMIC_FTRACE + _count_relocs++; /* add one for ftrace_caller */ +#endif return _count_relocs; } @@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } } +#ifdef CONFIG_DYNAMIC_FTRACE + module->arch.tramp = + do_plt_call(module->module_core, + (unsigned long)ftrace_caller, + sechdrs, module); +#endif return 0; } -- cgit v1.2.3-70-g09d2 From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 06:22:56 +0100 Subject: tracing/function-return-tracer: store return stack into task_struct and allocate it dynamically Impact: use deeper function tracing depth safely Some tests showed that function return tracing needed a more deeper depth of function calls. But it could be unsafe to store these return addresses to the stack. So these arrays will now be allocated dynamically into task_struct of current only when the tracer is activated. Typical scheme when tracer is activated: - allocate a return stack for each task in global list. - fork: allocate the return stack for the newly created task - exit: free return stack of current - idle init: same as fork I chose a default depth of 50. I don't have overruns anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ftrace.h | 1 - arch/x86/include/asm/thread_info.h | 29 ------------ arch/x86/kernel/ftrace.c | 29 ++++++------ include/linux/ftrace.h | 5 ++ include/linux/sched.h | 23 +++++---- kernel/exit.c | 5 +- kernel/fork.c | 4 ++ kernel/sched.c | 3 ++ kernel/trace/ftrace.c | 96 +++++++++++++++++++++++++++++++++++++- 9 files changed, 137 insertions(+), 58 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 2bb43b433e0..754a3e082f9 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -29,7 +29,6 @@ struct dyn_arch_ftrace { #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_RET_TRACER -#define FTRACE_RET_STACK_SIZE 20 #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e90e81ef6ab..0921b4018c1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,36 +40,8 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* Index of current stored adress in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; - /* - * Number of functions that haven't been traced - * because of depth overrun. - */ - atomic_t trace_overrun; -#endif }; -#ifdef CONFIG_FUNCTION_RET_TRACER -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ - .curr_ret_stack = -1,\ - .trace_overrun = ATOMIC_INIT(0) \ -} -#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -82,7 +54,6 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ } -#endif #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 356bb1eb6e9..bb137f7297e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time, unsigned long func) { int index; - struct thread_info *ti = current_thread_info(); + + if (!current->ret_stack) + return -EBUSY; /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { - atomic_inc(&ti->trace_overrun); + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); return -EBUSY; } - index = ++ti->curr_ret_stack; + index = ++current->curr_ret_stack; barrier(); - ti->ret_stack[index].ret = ret; - ti->ret_stack[index].func = func; - ti->ret_stack[index].calltime = time; + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; return 0; } @@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, { int index; - struct thread_info *ti = current_thread_info(); - index = ti->curr_ret_stack; - *ret = ti->ret_stack[index].ret; - *func = ti->ret_stack[index].func; - *time = ti->ret_stack[index].calltime; - *overrun = atomic_read(&ti->trace_overrun); - ti->curr_ret_stack--; + index = current->curr_ret_stack; + *ret = current->ret_stack[index].ret; + *func = current->ret_stack[index].func; + *time = current->ret_stack[index].calltime; + *overrun = atomic_read(¤t->trace_overrun); + current->curr_ret_stack--; } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e12..2ba259b2def 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -323,6 +323,8 @@ struct ftrace_retfunc { }; #ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RETFUNC_DEPTH 50 +#define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); @@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); + +extern void ftrace_retfunc_init_task(struct task_struct *t); +extern void ftrace_retfunc_exit_task(struct task_struct *t); #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8e0db46420..bee1e93c95a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1352,6 +1352,17 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; +#endif }; /* @@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* - * When fork() creates a child process, this function is called. - * But the child task may not inherit the return adresses traced - * by the return function tracer because it will directly execute - * in userspace and will not return to kernel functions its parent - * used. - */ - task_thread_info(p)->curr_ret_stack = -1; - atomic_set(&task_thread_info(p)->trace_overrun, 0); -#endif } static inline unsigned long *end_of_stack(struct task_struct *p) diff --git a/kernel/exit.c b/kernel/exit.c index 35c8ec2ba03..b9d446329da 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1127,7 +1128,9 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_exit_task(tsk); +#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index ac62f43ee43..d1eb30e69cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1269,6 +1270,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(p); +#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 4de56108c86..fb17205950d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,6 +5901,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(idle); +#endif } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f212da48668..90d99fb02ae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_RET_TRACER +static atomic_t ftrace_retfunc_active; + /* The callback that hooks the return of a function */ trace_function_return_t ftrace_function_return = (trace_function_return_t)ftrace_stub; + +/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ +static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +{ + int i; + int ret = 0; + unsigned long flags; + int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; + struct task_struct *g, *t; + + for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { + ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!ret_stack_list[i]) { + start = 0; + end = i; + ret = -ENOMEM; + goto free; + } + } + + read_lock_irqsave(&tasklist_lock, flags); + do_each_thread(g, t) { + if (start == end) { + ret = -EAGAIN; + goto unlock; + } + + if (t->ret_stack == NULL) { + t->ret_stack = ret_stack_list[start++]; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } + } while_each_thread(g, t); + +unlock: + read_unlock_irqrestore(&tasklist_lock, flags); +free: + for (i = start; i < end; i++) + kfree(ret_stack_list[i]); + return ret; +} + +/* Allocate a return stack for each task */ +static int start_return_tracing(void) +{ + struct ftrace_ret_stack **ret_stack_list; + int ret; + + ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * + sizeof(struct ftrace_ret_stack *), + GFP_KERNEL); + + if (!ret_stack_list) + return -ENOMEM; + + do { + ret = alloc_retstack_tasklist(ret_stack_list); + } while (ret == -EAGAIN); + + kfree(ret_stack_list); + return ret; +} + int register_ftrace_return(trace_function_return_t func) { int ret = 0; @@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func) ret = -EBUSY; goto out; } - + atomic_inc(&ftrace_retfunc_active); + ret = start_return_tracing(); + if (ret) { + atomic_dec(&ftrace_retfunc_active); + goto out; + } ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_function_return = func; ftrace_startup(); @@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void) { mutex_lock(&ftrace_sysctl_lock); + atomic_dec(&ftrace_retfunc_active); ftrace_function_return = (trace_function_return_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ @@ -1537,6 +1610,27 @@ void unregister_ftrace_return(void) mutex_unlock(&ftrace_sysctl_lock); } + +/* Allocate a return stack for newly created task */ +void ftrace_retfunc_init_task(struct task_struct *t) +{ + if (atomic_read(&ftrace_retfunc_active)) { + t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!t->ret_stack) + return; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } else + t->ret_stack = NULL; +} + +void ftrace_retfunc_exit_task(struct task_struct *t) +{ + kfree(t->ret_stack); + t->ret_stack = NULL; +} #endif -- cgit v1.2.3-70-g09d2 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 5 ++- arch/x86/kernel/stacktrace.c | 57 +++++++++++++++++++++++++++ include/linux/stacktrace.h | 8 ++++ kernel/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 9 +++++ 5 files changed, 171 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 753f4de4b17..79a80f79c06 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -324,7 +324,7 @@ output. To see what is available, simply cat the file: cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree + noblock nostacktrace nosched-tree nouserstacktrace To disable one of the options, echo in the option prepended with "no". @@ -378,6 +378,9 @@ Here are the available options: When a trace is recorded, so is the stack of functions. This allows for back traces of trace sites. + userstacktrace - This option changes the trace. + It records a stacktrace of the current userspace thread. + sched-tree - TBD (any users??) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c..b1515306041 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -6,6 +6,7 @@ #include #include #include +#include #include static void save_stack_warning(void *data, char *msg) @@ -83,3 +84,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ + +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) { + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + frame.next_fp = NULL; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.return_address) + trace->entries[trace->nr_entries++] = + frame.return_address; + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b106fd8e0d5..68de51468f5 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); + +#ifdef CONFIG_X86 +extern void save_stack_trace_user(struct stack_trace *trace); +#else +# define save_stack_trace_user(trace) do { } while (0) +#endif + #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f037522..ced8b4fa9f5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -275,6 +275,7 @@ static const char *trace_options[] = { "ftrace_preempt", "branch", "annotate", + "userstacktrace", NULL }; @@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr, ftrace_trace_stack(tr, data, flags, skip, preempt_count()); } +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, int pc) +{ + struct userstack_entry *entry; + struct stack_trace trace; + struct ring_buffer_event *event; + unsigned long irq_flags; + + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_USER_STACK; + + memset(&entry->caller, 0, sizeof(entry->caller)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = 0; + trace.entries = entry->caller; + + save_stack_trace_user(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +} + +void __trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags) +{ + ftrace_trace_userstack(tr, data, flags, preempt_count()); +} + static void ftrace_trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, irq_flags, 4, pc); + ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } @@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 5, pc); + ftrace_trace_userstack(tr, data, flags, pc); } void @@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 6, pc); + ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + int ret = 1; + unsigned i; + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + ret = trace_seq_puts(s, "??"); + continue; + } + if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + } + + return ret; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + seq_print_userip_objs(field, s, sym_flags); + if (entry->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + ret = seq_print_userip_objs(field, s, sym_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_putc(s, '\n'); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + break; + } } return TRACE_TYPE_HANDLED; } @@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2cb12fd98f6..17bb4c830b0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -26,6 +26,7 @@ enum trace_type { TRACE_BOOT_CALL, TRACE_BOOT_RET, TRACE_FN_RET, + TRACE_USER_STACK, __TRACE_LAST_TYPE }; @@ -42,6 +43,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int tgid; }; /* @@ -99,6 +101,11 @@ struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; }; +struct userstack_entry { + struct trace_entry ent; + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + /* * ftrace_printk entry: */ @@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ + IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ @@ -500,6 +508,7 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, TRACE_ITER_BRANCH = 0x1000, TRACE_ITER_ANNOTATE = 0x2000, + TRACE_ITER_USERSTACKTRACE = 0x4000 }; /* -- cgit v1.2.3-70-g09d2 From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 12:39:06 +0200 Subject: tracing/stack-tracer: fix style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 51 +++++++++++++++++++++++++------------------- include/linux/stacktrace.h | 2 +- kernel/trace/trace.c | 7 +++--- 3 files changed, 33 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b1515306041..10786af9554 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); struct stack_frame { const void __user *next_fp; - unsigned long return_address; + unsigned long ret_addr; }; static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) @@ -108,33 +108,40 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) return ret; } +static inline void __save_stack_trace_user(struct stack_trace *trace) +{ + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + + frame.next_fp = NULL; + frame.ret_addr = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.ret_addr) { + trace->entries[trace->nr_entries++] = + frame.ret_addr; + } + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } +} + void save_stack_trace_user(struct stack_trace *trace) { /* * Trace user stack if we are not a kernel thread */ if (current->mm) { - const struct pt_regs *regs = task_pt_regs(current); - const void __user *fp = (const void __user *)regs->bp; - - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = regs->ip; - - while (trace->nr_entries < trace->max_entries) { - struct stack_frame frame; - frame.next_fp = NULL; - frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; - if (frame.return_address) - trace->entries[trace->nr_entries++] = - frame.return_address; - if (fp == frame.next_fp) - break; - fp = frame.next_fp; - } + __save_stack_trace_user(trace); } if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 68de51468f5..fd42d685110 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -25,7 +25,7 @@ extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) -# define save_stack_trace_user(trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62776b71b1c..dedf35f3697 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -948,9 +948,9 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { + struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - struct ring_buffer_event *event; unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) @@ -1471,8 +1471,7 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, if (file) { ret = trace_seq_path(s, &file->f_path); if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", - ip - vmstart); + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) ret = trace_seq_printf(s, " <" IP_FMT ">", ip); @@ -1485,7 +1484,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, { struct mm_struct *mm = NULL; int ret = 1; - unsigned i; + unsigned int i; if (trace_flags & TRACE_ITER_SYM_USEROBJ) { struct task_struct *task; -- cgit v1.2.3-70-g09d2 From 8d26487fd4ddda7a0237da418fb8669fb06ae557 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 12:39:08 +0200 Subject: tracing/stack-tracer: introduce CONFIG_USER_STACKTRACE_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup User stack tracing is just implemented for x86, but it is not x86 specific. Introduce a generic config flag, that is currently enabled only for x86. When other arches implement it, they will have to SELECT USER_STACKTRACE_SUPPORT. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + include/linux/stacktrace.h | 2 +- kernel/trace/Kconfig | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7a146baaa99..e49a4fd718f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -36,6 +36,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS + select USER_STACKTRACE_SUPPORT config ARCH_DEFCONFIG string diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index fd42d685110..1a8cecc4f38 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -16,7 +16,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, extern void print_stack_trace(struct stack_trace *trace, int spaces); -#ifdef CONFIG_X86 +#ifdef CONFIG_USER_STACKTRACE_SUPPORT extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace_user(trace) do { } while (0) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b8378fad29a..87fc34a1bb9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -3,6 +3,9 @@ # select HAVE_FUNCTION_TRACER: # +config USER_STACKTRACE_SUPPORT + bool + config NOP_TRACER bool -- cgit v1.2.3-70-g09d2