From 0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 23:12:58 -0400 Subject: tracing: adding function timings to function profiler If the function graph trace is enabled, the function profiler will use it to take the timing of the functions. cat /debug/tracing/trace_stat/functions Function Hit Time -------- --- ---- mwait_idle 127 183028.4 us schedule 26 151997.7 us __schedule 31 151975.1 us sys_wait4 2 74080.53 us do_wait 2 74077.80 us sys_newlstat 138 39929.16 us do_path_lookup 179 39845.79 us vfs_lstat_fd 138 39761.97 us user_path_at 153 39469.58 us path_walk 179 39435.76 us __link_path_walk 189 39143.73 us [...] Note the times are skewed due to the function graph tracer not taking into account schedules. Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d28687e7b3a..85bba0f018b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -426,8 +426,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_HANDLED; } -static enum print_line_t -print_graph_duration(unsigned long long duration, struct trace_seq *s) +enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); /* log10(ULONG_MAX) + '\0' */ @@ -464,12 +464,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) if (!ret) return TRACE_TYPE_PARTIAL_LINE; } + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_duration(unsigned long long duration, struct trace_seq *s) +{ + int ret; + + ret = trace_print_graph_duration(duration, s); + if (ret != TRACE_TYPE_HANDLED) + return ret; ret = trace_seq_printf(s, "| "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + return TRACE_TYPE_HANDLED; } /* Case of a leaf function on its call entry */ -- cgit v1.2.3-70-g09d2 From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 23:17:58 -0400 Subject: function-graph: add option to calculate graph time or not graph time is the time that a function is executing another function. Thus if function A calls B, if graph-time is set, then the time for A includes B. This is the default behavior. But if graph-time is off, then the time spent executing B is subtracted from A. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 21 ++++++++++++++++++++- kernel/trace/trace.c | 4 +++- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 8 ++++---- 5 files changed, 29 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d22cf7..9e0a8d245e5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -365,6 +365,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -376,8 +377,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ed1fc5021d4..71e5faef12a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { struct ftrace_profile_stat *stat; + unsigned long long calltime; struct ftrace_profile *rec; unsigned long flags; @@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash) goto out; + calltime = trace->rettime - trace->calltime; + + if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + int index; + + index = trace->depth; + + /* Append this call time to the parent time to subtract */ + if (index) + current->ret_stack[index - 1].subtime += calltime; + + if (current->ret_stack[index].subtime < calltime) + calltime -= current->ret_stack[index].subtime; + else + calltime = 0; + } + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) - rec->time += trace->rettime - trace->calltime; + rec->time += calltime; + out: local_irq_restore(flags); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 821bf49771d..5d1a16cae37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | + TRACE_ITER_GRAPH_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -317,6 +318,7 @@ static const char *trace_options[] = { "latency-format", "global-clock", "sleep-time", + "graph-time", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c66ca3b6605..e3429a8ab05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -685,6 +685,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 85bba0f018b..10f6ad7d85f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; + current->ret_stack[index].subtime = 0; *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -void +static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; @@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; - barrier(); - current->curr_ret_stack--; - } /* @@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void) ftrace_pop_return_trace(&trace, &ret); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); + barrier(); + current->curr_ret_stack--; if (unlikely(!ret)) { ftrace_graph_stop(); -- cgit v1.2.3-70-g09d2 From 82310a3272d5a2a7652f5649ad8a55f58c8f74d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Jun 2009 12:26:07 -0400 Subject: function-graph: enable the stack after initialization of other variables The function graph tracer checks if the task_struct has ret_stack defined to know if it is OK or not to use it. The initialization is done for all tasks by one process, but the idle tasks use the same initialization used by new tasks. If an interrupt happens on an idle task that just had the ret_stack created, but before the rest of the initialization took place, then we can corrupt the return address of the functions. This patch moves the setting of the task_struct's ret_stack to after the other variables have been initialized. [ Impact: prevent kernel panic on idle task when starting function graph ] Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 9 +++++++-- kernel/trace/trace_functions_graph.c | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ebff62ef40b..20e066065eb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2739,15 +2739,20 @@ void unregister_ftrace_graph(void) void ftrace_graph_init_task(struct task_struct *t) { if (atomic_read(&ftrace_graph_active)) { - t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + struct ftrace_ret_stack *ret_stack; + + ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); - if (!t->ret_stack) + if (!ret_stack) return; t->curr_ret_stack = -1; atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; + /* make curr_ret_stack visable before we add the ret_stack */ + smp_wmb(); + t->ret_stack = ret_stack; } else t->ret_stack = NULL; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d28687e7b3a..baeb5fe3610 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) if (!current->ret_stack) return -EBUSY; + /* + * We must make sure the ret_stack is tested before we read + * anything else. + */ + smp_rmb(); + /* The return trace stack is full */ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { atomic_inc(¤t->trace_overrun); -- cgit v1.2.3-70-g09d2 From 71e308a239c098673570d0b417d42262bb535909 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 18 Jun 2009 12:45:08 -0400 Subject: function-graph: add stack frame test In case gcc does something funny with the stack frames, or the return from function code, we would like to detect that. An arch may implement passing of a variable that is unique to the function and can be saved on entering a function and can be tested when exiting the function. Usually the frame pointer can be used for this purpose. This patch also implements this for x86. Where it passes in the stack frame of the parent function, and will test that frame on exit. There was a case in x86_32 with optimize for size (-Os) where, for a few functions, gcc would align the stack frame and place a copy of the return address into it. The function graph tracer modified the copy and not the actual return address. On return from the funtion, it did not go to the tracer hook, but returned to the parent. This broke the function graph tracer, because the return of the parent (where gcc did not do this funky manipulation) returned to the location that the child function was suppose to. This caused strange kernel crashes. This test detected the problem and pointed out where the issue was. This modifies the parameters of one of the functions that the arch specific code calls, so it includes changes to arch code to accommodate the new prototype. Note, I notice that the parsic arch implements its own push_return_trace. This is now a generic function and the ftrace_push_return_trace should be used instead. This patch does not touch that code. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Frederic Weisbecker Cc: Helge Deller Cc: Kyle McMartin Signed-off-by: Steven Rostedt --- arch/powerpc/kernel/ftrace.c | 2 +- arch/s390/kernel/ftrace.c | 2 +- arch/x86/Kconfig | 1 + arch/x86/kernel/entry_32.S | 2 ++ arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/ftrace.c | 6 ++++-- include/linux/ftrace.h | 4 +++- kernel/trace/Kconfig | 7 +++++++ kernel/trace/trace_functions_graph.c | 36 ++++++++++++++++++++++++++++++++---- 9 files changed, 53 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace_functions_graph.c') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 2d182f119d1..58d6a610904 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -605,7 +605,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { + if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) { *parent = old; return; } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 82ddfd3a75a..3e298e64f0d 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -190,7 +190,7 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent) goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth) == -EBUSY) + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) goto out; trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN; /* Only trace if the calling function expects to. */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 356d2ec8e2f..fcf12af0742 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -33,6 +33,7 @@ config X86 select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select HAVE_FTRACE_SYSCALLS diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c929add475c..0d4b28564c1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1154,6 +1154,7 @@ ENTRY(ftrace_graph_caller) pushl %edx movl 0xc(%esp), %edx lea 0x4(%ebp), %eax + movl (%ebp), %ecx subl $MCOUNT_INSN_SIZE, %edx call prepare_ftrace_return popl %edx @@ -1168,6 +1169,7 @@ return_to_handler: pushl %eax pushl %ecx pushl %edx + movl %ebp, %eax call ftrace_return_to_handler movl %eax, 0xc(%esp) popl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index de74f0a3e0e..c251be74510 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller) leaq 8(%rbp), %rdi movq 0x38(%rsp), %rsi + movq (%rbp), %rdx subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return @@ -150,6 +151,7 @@ GLOBAL(return_to_handler) /* Save the return values */ movq %rax, (%rsp) movq %rdx, 8(%rsp) + movq %rbp, %rdi call ftrace_return_to_handler diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b79c5533c42..d94e1ea3b9f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void) * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) { unsigned long old; int faulted; @@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { + if (ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer) == -EBUSY) { *parent = old; return; } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 39b95c56587..dc3b1328aae 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -362,6 +362,7 @@ struct ftrace_ret_stack { unsigned long func; unsigned long long calltime; unsigned long long subtime; + unsigned long fp; }; /* @@ -372,7 +373,8 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, + unsigned long frame_pointer); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1eac85253ce..b17ed8787de 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -18,6 +18,13 @@ config HAVE_FUNCTION_TRACER config HAVE_FUNCTION_GRAPH_TRACER bool +config HAVE_FUNCTION_GRAPH_FP_TEST + bool + help + An arch may pass in a unique value (frame pointer) to both the + entering and exiting of a function. On exit, the value is compared + and if it does not match, then it will panic the kernel. + config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool help diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 8b592418d8b..d2249abafb5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -57,7 +57,8 @@ static struct tracer_flags tracer_flags = { /* Add a function return address to the trace stack on thread info.*/ int -ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) +ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth, + unsigned long frame_pointer) { unsigned long long calltime; int index; @@ -85,6 +86,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; current->ret_stack[index].subtime = 0; + current->ret_stack[index].fp = frame_pointer; *depth = index; return 0; @@ -92,7 +94,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) /* Retrieve a function return address to the trace stack on thread info.*/ static void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) +ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, + unsigned long frame_pointer) { int index; @@ -106,6 +109,31 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) return; } +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST + /* + * The arch may choose to record the frame pointer used + * and check it here to make sure that it is what we expect it + * to be. If gcc does not set the place holder of the return + * address in the frame pointer, and does a copy instead, then + * the function graph trace will fail. This test detects this + * case. + * + * Currently, x86_32 with optimize for size (-Os) makes the latest + * gcc do the above. + */ + if (unlikely(current->ret_stack[index].fp != frame_pointer)) { + ftrace_graph_stop(); + WARN(1, "Bad frame pointer: expected %lx, received %lx\n" + " from func %pF return to %lx\n", + current->ret_stack[index].fp, + frame_pointer, + (void *)current->ret_stack[index].func, + current->ret_stack[index].ret); + *ret = (unsigned long)panic; + return; + } +#endif + *ret = current->ret_stack[index].ret; trace->func = current->ret_stack[index].func; trace->calltime = current->ret_stack[index].calltime; @@ -117,12 +145,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) * Send the trace to the ring-buffer. * @return the original return address. */ -unsigned long ftrace_return_to_handler(void) +unsigned long ftrace_return_to_handler(unsigned long frame_pointer) { struct ftrace_graph_ret trace; unsigned long ret; - ftrace_pop_return_trace(&trace, &ret); + ftrace_pop_return_trace(&trace, &ret, frame_pointer); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); barrier(); -- cgit v1.2.3-70-g09d2