From 9c0729dc8062bed96189bd14ac6d4920f3958743 Mon Sep 17 00:00:00 2001 From: Soeren Sandmann Pedersen Date: Fri, 5 Nov 2010 05:59:39 -0400 Subject: x86: Eliminate bp argument from the stack tracing routines The various stack tracing routines take a 'bp' argument in which the caller is supposed to provide the base pointer to use, or 0 if doesn't have one. Since bp is garbage whenever CONFIG_FRAME_POINTER is not defined, this means all callers in principle should either always pass 0, or be conditional on CONFIG_FRAME_POINTER. However, there are only really three use cases for stack tracing: (a) Trace the current task, including IRQ stack if any (b) Trace the current task, but skip IRQ stack (c) Trace some other task In all cases, if CONFIG_FRAME_POINTER is not defined, bp should just be 0. If it _is_ defined, then - in case (a) bp should be gotten directly from the CPU's register, so the caller should pass NULL for regs, - in case (b) the caller should should pass the IRQ registers to dump_trace(), - in case (c) bp should be gotten from the top of the task's stack, so the caller should pass NULL for regs. Hence, the bp argument is not necessary because the combination of task and regs is sufficient to determine an appropriate value for bp. This patch introduces a new inline function stack_frame(task, regs) that computes the desired bp. This function is then called from the two versions of dump_stack(). Signed-off-by: Soren Sandmann Acked-by: Steven Rostedt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Arjan van de Ven , Cc: Frederic Weisbecker , Cc: Arnaldo Carvalho de Melo , LKML-Reference: > Signed-off-by: Frederic Weisbecker --- include/linux/stacktrace.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 51efbef38fb..25310f1d7f3 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -2,6 +2,7 @@ #define __LINUX_STACKTRACE_H struct task_struct; +struct pt_regs; #ifdef CONFIG_STACKTRACE struct task_struct; @@ -13,7 +14,8 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); -extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); +extern void save_stack_trace_regs(struct stack_trace *trace, + struct pt_regs *regs); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); -- cgit v1.2.3-70-g09d2 From 61c32659b12c44e62de32fbf99f7e4ca783dc38b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:39:17 +0100 Subject: tracing: New flag to allow non privileged users to use a trace event This adds a new trace event internal flag that allows them to be used in perf by non privileged users in case of task bound tracing. This is desired for syscalls tracepoint because they don't leak global system informations, like some other tracepoints. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 2 ++ kernel/perf_event.c | 9 --------- kernel/trace/trace_event_perf.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8beabb958f6..312dce7e0d5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -154,12 +154,14 @@ enum { TRACE_EVENT_FL_ENABLED_BIT, TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, + TRACE_EVENT_FL_CAP_ANY_BIT, }; enum { TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), + TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), }; struct ftrace_event_call { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f498..ee1e903f983 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4747,15 +4747,6 @@ static int perf_tp_event_init(struct perf_event *event) if (event->attr.type != PERF_TYPE_TRACEPOINT) return -ENOENT; - /* - * Raw tracepoint data is a severe data leak, only allow root to - * have these. - */ - if ((event->attr.sample_type & PERF_SAMPLE_RAW) && - perf_paranoid_tracepoint_raw() && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - err = perf_trace_init(event); if (err) return err; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 39c059ca670..19a359d5e6d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; +static int perf_trace_event_perm(struct ftrace_event_call *tp_event, + struct perf_event *p_event) +{ + /* No tracing, just counting, so no obvious leak */ + if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) + return 0; + + /* Some events are ok to be traced by non-root users... */ + if (p_event->attach_state == PERF_ATTACH_TASK) { + if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) + return 0; + } + + /* + * ...otherwise raw tracepoint data can be a severe data leak, + * only allow root to have these. + */ + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; - int ret = -ENOMEM; + int ret; int cpu; + ret = perf_trace_event_perm(tp_event, p_event); + if (ret) + return ret; + p_event->tp_event = tp_event; if (tp_event->perf_refcount++ > 0) return 0; + ret = -ENOMEM; + list = alloc_percpu(struct hlist_head); if (!list) goto fail; -- cgit v1.2.3-70-g09d2 From 1ed0c5971159974185653170543a764cc061c857 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:46:57 +0100 Subject: tracing: New macro to set up initial event flags value This introduces the new TRACE_EVENT_FLAGS() macro in order to set up initial event flags value. This macro must simply follow the definition of a trace event and take the event name and the flag value as parameters: TRACE_EVENT(my_event, ..... .... ); TRACE_EVENT_FLAGS(my_event, 1) This will set up 1 as the initial my_event->flags value. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/tracepoint.h | 4 ++++ include/trace/ftrace.h | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a4a90b6726c..5a6074fcd81 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -234,6 +234,8 @@ do_trace: \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -354,4 +356,6 @@ do_trace: \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FLAGS(event, flag) + #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a9377c0083a..6f540123d43 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -82,6 +82,15 @@ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ +#undef TRACE_EVENT_FLAGS +#define TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -129,6 +138,9 @@ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) +#undef TRACE_EVENT_FLAGS +#define TRACE_EVENT_FLAGS(event, flag) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* -- cgit v1.2.3-70-g09d2 From fe5542030dce3b951f9eaf3ecb9a7bc5fa7bfed1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 01:52:06 +0100 Subject: tracing: Allow raw syscall trace events for non privileged users This allows non privileged users to use the raw syscall trace events for task bound tracing in perf. It is safe because raw syscall trace events don't leak system wide informations. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/trace/events/syscalls.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index fb726ac7cae..5a4c04a75b3 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter, syscall_regfunc, syscall_unregfunc ); +TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY) + TRACE_EVENT_FN(sys_exit, TP_PROTO(struct pt_regs *regs, long ret), @@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit, syscall_regfunc, syscall_unregfunc ); +TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY) + #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ #endif /* _TRACE_EVENTS_SYSCALLS_H */ -- cgit v1.2.3-70-g09d2 From 53cf810b1934f08a68e131aeeb16267a778f43df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:11:42 +0100 Subject: tracing: Allow syscall trace events for non privileged users As for the raw syscalls events, individual syscall events won't leak system wide information on task bound tracing. Allow non privileged users to use them in such workflow. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/ftrace_event.h | 8 ++++++++ include/linux/syscalls.h | 6 ++++-- include/trace/ftrace.h | 7 +------ 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 312dce7e0d5..725bf6bd39f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -198,6 +198,14 @@ struct ftrace_event_call { #endif }; +#define __TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ + event_##name.flags = value; \ + return 0; \ + } \ + early_initcall(trace_init_flags_##name); + #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_PRED 32 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cacc27a0e28..13b9731d30c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -137,7 +137,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ @@ -152,7 +153,8 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ - } + }; \ + __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 6f540123d43..e718a917d89 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -84,12 +84,7 @@ #undef TRACE_EVENT_FLAGS #define TRACE_EVENT_FLAGS(name, value) \ - static int __init trace_init_flags_##name(void) \ - { \ - event_##name.flags = value; \ - return 0; \ - } \ - early_initcall(trace_init_flags_##name); + __TRACE_EVENT_FLAGS(name, value) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.2.3-70-g09d2 From 423478cde453eebdfcfebf4b8d378d8f5d49b853 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Nov 2010 02:21:26 +0100 Subject: tracing: Remove useless syscall ftrace_event_call declaration It is defined right after, which makes the declaration completely useless. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron --- include/linux/syscalls.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 13b9731d30c..18cd0684fc4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_enter_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -143,8 +141,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ - static struct ftrace_event_call \ - __attribute__((__aligned__(4))) event_exit_##sname; \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ -- cgit v1.2.3-70-g09d2