From 884bfe89a462fcc85c8abd96171519cf2fe70929 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Fri, 15 Jul 2011 14:23:58 -0700 Subject: ring-buffer: Add a 'dropped events' counter The existing 'overrun' counter is incremented when the ring buffer wraps around, with overflow on (the default). We wanted a way to count requests lost from the buffer filling up with overflow off, too. I decided to add a new counter instead of retro-fitting the existing one because it seems like a different statistic to count conceptually, and also because of how the code was structured. Link: http://lkml.kernel.org/r/1310765038-26399-1-git-send-email-slavapestov@google.com Signed-off-by: Slava Pestov Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 6c8835f74f7..2007375cfe7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -166,6 +166,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3-70-g09d2 From 01e3e710a9265fb7092efd67243d7b6dd6e2548a Mon Sep 17 00:00:00 2001 From: David Sharp Date: Thu, 7 Jun 2012 16:46:24 -0700 Subject: tracing: Trivial cleanup Remove ftrace_format_syscall() declaration; it is neither defined nor used. Also update a comment and formatting. Link: http://lkml.kernel.org/r/1339112785-21806-1-git-send-email-vnagarnaik@google.com Signed-off-by: David Sharp Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt --- include/trace/syscall.h | 2 -- kernel/trace/ring_buffer.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 31966a4fb8c..0c95796177d 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,8 +39,6 @@ extern int reg_event_syscall_enter(struct ftrace_event_call *call); extern void unreg_event_syscall_enter(struct ftrace_event_call *call); extern int reg_event_syscall_exit(struct ftrace_event_call *call); extern void unreg_event_syscall_exit(struct ftrace_event_call *call); -extern int -ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags, struct trace_event *event); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0ebeb1d76dd..23a384b9251 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1821,7 +1821,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) } /** - * ring_buffer_update_event - update event type and data + * rb_update_event - update event type and data * @event: the even to update * @type: the type of event * @length: the size of the event field in the ring buffer @@ -2723,8 +2723,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); * and not the length of the event which would hold the header. */ int ring_buffer_write(struct ring_buffer *buffer, - unsigned long length, - void *data) + unsigned long length, + void *data) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; -- cgit v1.2.3-70-g09d2 From 6f86ab9fcaef122abb837819139eadac1a0ca966 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Thu, 7 Jun 2012 16:46:25 -0700 Subject: tracing: Cleanup unnecessary function declarations The functions defined in include/trace/syscalls.h are not used directly since struct ftrace_event_class was introduced. Remove them from the header file and rearrange the ftrace_event_class declarations in trace_syscalls.c. Link: http://lkml.kernel.org/r/1339112785-21806-2-git-send-email-vnagarnaik@google.com Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt --- include/trace/syscall.h | 21 --------------- kernel/trace/trace_syscalls.c | 61 ++++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 0c95796177d..84bc4197e73 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -31,25 +31,4 @@ struct syscall_metadata { struct ftrace_event_call *exit_event; }; -#ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long arch_syscall_addr(int nr); -extern int init_syscall_trace(struct ftrace_event_call *call); - -extern int reg_event_syscall_enter(struct ftrace_event_call *call); -extern void unreg_event_syscall_enter(struct ftrace_event_call *call); -extern int reg_event_syscall_exit(struct ftrace_event_call *call); -extern void unreg_event_syscall_exit(struct ftrace_event_call *call); -enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags, - struct trace_event *event); -enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags, - struct trace_event *event); -#endif - -#ifdef CONFIG_PERF_EVENTS -int perf_sysenter_enable(struct ftrace_event_call *call); -void perf_sysenter_disable(struct ftrace_event_call *call); -int perf_sysexit_enable(struct ftrace_event_call *call); -void perf_sysexit_disable(struct ftrace_event_call *call); -#endif - #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 2485a7d09b1..7609dd6714c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -21,9 +21,6 @@ static int syscall_enter_register(struct ftrace_event_call *event, static int syscall_exit_register(struct ftrace_event_call *event, enum trace_reg type, void *data); -static int syscall_enter_define_fields(struct ftrace_event_call *call); -static int syscall_exit_define_fields(struct ftrace_event_call *call); - static struct list_head * syscall_get_enter_fields(struct ftrace_event_call *call) { @@ -32,30 +29,6 @@ syscall_get_enter_fields(struct ftrace_event_call *call) return &entry->enter_fields; } -struct trace_event_functions enter_syscall_print_funcs = { - .trace = print_syscall_enter, -}; - -struct trace_event_functions exit_syscall_print_funcs = { - .trace = print_syscall_exit, -}; - -struct ftrace_event_class event_class_syscall_enter = { - .system = "syscalls", - .reg = syscall_enter_register, - .define_fields = syscall_enter_define_fields, - .get_fields = syscall_get_enter_fields, - .raw_init = init_syscall_trace, -}; - -struct ftrace_event_class event_class_syscall_exit = { - .system = "syscalls", - .reg = syscall_exit_register, - .define_fields = syscall_exit_define_fields, - .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), - .raw_init = init_syscall_trace, -}; - extern struct syscall_metadata *__start_syscalls_metadata[]; extern struct syscall_metadata *__stop_syscalls_metadata[]; @@ -432,7 +405,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -int init_syscall_trace(struct ftrace_event_call *call) +static int init_syscall_trace(struct ftrace_event_call *call) { int id; int num; @@ -457,6 +430,30 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } +struct trace_event_functions enter_syscall_print_funcs = { + .trace = print_syscall_enter, +}; + +struct trace_event_functions exit_syscall_print_funcs = { + .trace = print_syscall_exit, +}; + +struct ftrace_event_class event_class_syscall_enter = { + .system = "syscalls", + .reg = syscall_enter_register, + .define_fields = syscall_enter_define_fields, + .get_fields = syscall_get_enter_fields, + .raw_init = init_syscall_trace, +}; + +struct ftrace_event_class event_class_syscall_exit = { + .system = "syscalls", + .reg = syscall_exit_register, + .define_fields = syscall_exit_define_fields, + .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), + .raw_init = init_syscall_trace, +}; + unsigned long __init __weak arch_syscall_addr(int nr) { return (unsigned long)sys_call_table[nr]; @@ -537,7 +534,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -int perf_sysenter_enable(struct ftrace_event_call *call) +static int perf_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; @@ -558,7 +555,7 @@ int perf_sysenter_enable(struct ftrace_event_call *call) return ret; } -void perf_sysenter_disable(struct ftrace_event_call *call) +static void perf_sysenter_disable(struct ftrace_event_call *call) { int num; @@ -615,7 +612,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -int perf_sysexit_enable(struct ftrace_event_call *call) +static int perf_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; @@ -636,7 +633,7 @@ int perf_sysexit_enable(struct ftrace_event_call *call) return ret; } -void perf_sysexit_disable(struct ftrace_event_call *call) +static void perf_sysexit_disable(struct ftrace_event_call *call) { int num; -- cgit v1.2.3-70-g09d2 From 60efc15ae96c7aace8060411b0d5add20e1ab21e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 25 Oct 2012 15:41:51 +0200 Subject: linux/kernel.h: Remove duplicate trace_printk declaration !CONFIG_TRACING both declares and defines (empty) trace_printk. The first one is not redundant so it can be removed. Link: http://lkml.kernel.org/r/1351172511-18125-1-git-send-email-mhocko@suse.cz Signed-off-by: Michal Hocko Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index a123b13b70f..7785d5df6d8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -527,9 +527,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); #else -static inline __printf(1, 2) -int trace_printk(const char *fmt, ...); - static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } @@ -539,8 +536,8 @@ static inline void tracing_on(void) { } static inline void tracing_off(void) { } static inline int tracing_is_on(void) { return 0; } -static inline int -trace_printk(const char *fmt, ...) +static inline __printf(1, 2) +int trace_printk(const char *fmt, ...) { return 0; } -- cgit v1.2.3-70-g09d2 From 50ecf2c3afead23a05227ab004e4212eca08c207 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Thu, 11 Oct 2012 16:27:54 -0700 Subject: ring-buffer: Change unsigned long type of ring_buffer_oldest_event_ts() to u64 ring_buffer_oldest_event_ts() should return a value of u64 type, because ring_buffer_per_cpu->buffer_page->buffer_data_page->time_stamp is u64 type. Link: http://lkml.kernel.org/r/1349998076-15495-5-git-send-email-dhsharp@google.com Cc: Frederic Weisbecker Cc: Vaibhav Nagarnaik Signed-off-by: Yoshihiro YUNOMAE Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 2007375cfe7..519777e3fa0 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -159,7 +159,7 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer); void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); +u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 23a384b9251..3c7834c24e5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2932,12 +2932,12 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; - unsigned long ret; + u64 ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; -- cgit v1.2.3-70-g09d2 From 0d5c6e1c19bab82fad4837108c2902f557d62a04 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 Nov 2012 20:54:21 -0400 Subject: tracing: Use irq_work for wake ups and remove *_nowake_*() functions Have the ring buffer commit function use the irq_work infrastructure to wake up any waiters waiting on the ring buffer for new data. The irq_work was created for such a purpose, where doing the actual wake up at the time of adding data is too dangerous, as an event or function trace may be in the midst of the work queue locks and cause deadlocks. The irq_work will either delay the action to the next timer interrupt, or trigger an IPI to itself forcing an interrupt to do the work (in a safe location). With irq_work, all ring buffer commits can safely do wakeups, removing the need for the ring buffer commit "nowake" variants, which were used by events and function tracing. All commits can now safely use the normal commit, and the "nowake" variants can be removed. Cc: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 14 ++--- include/trace/ftrace.h | 3 +- kernel/trace/Kconfig | 1 + kernel/trace/trace.c | 121 +++++++++++++++++++++----------------- kernel/trace/trace.h | 5 -- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_kprobe.c | 8 +-- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_selftest.c | 1 + 9 files changed, 84 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 642928cf57b..b80c8ddfbbd 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -127,13 +127,13 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs); +void trace_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a763888a36f..698f2a89032 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -545,8 +545,7 @@ ftrace_raw_event_##call(void *__data, proto) \ { assign; } \ \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ - trace_nowake_buffer_unlock_commit(buffer, \ - event, irq_flags, pc); \ + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \ } /* * The ftrace_test_probe is compiled out, it is only here as a build time check diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4cea4f41c1d..5d89335a485 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -119,6 +119,7 @@ config TRACING select BINARY_PRINTF select EVENT_TRACING select TRACE_CLOCK + select IRQ_WORK config GENERIC_TRACER bool diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d5cbc0d3f20..37d1c703e3e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,14 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set) */ static DEFINE_PER_CPU(bool, trace_cmdline_save); +/* + * When a reader is waiting for data, then this variable is + * set to true. + */ +static bool trace_wakeup_needed; + +static struct irq_work trace_work_wakeup; + /* * Kill all tracing for good (never come back). * It is initialized to 1 but will turn to zero if the initialization @@ -329,12 +338,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | static int trace_stop_count; static DEFINE_RAW_SPINLOCK(tracing_start_lock); -static void wakeup_work_handler(struct work_struct *work) +/** + * trace_wake_up - wake up tasks waiting for trace input + * + * Schedules a delayed work to wake up any task that is blocked on the + * trace_wait queue. These is used with trace_poll for tasks polling the + * trace. + */ +static void trace_wake_up(struct irq_work *work) { - wake_up(&trace_wait); -} + wake_up_all(&trace_wait); -static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); +} /** * tracing_on - enable tracing buffers @@ -389,22 +404,6 @@ int tracing_is_on(void) } EXPORT_SYMBOL_GPL(tracing_is_on); -/** - * trace_wake_up - wake up tasks waiting for trace input - * - * Schedules a delayed work to wake up any task that is blocked on the - * trace_wait queue. These is used with trace_poll for tasks polling the - * trace. - */ -void trace_wake_up(void) -{ - const unsigned long delay = msecs_to_jiffies(2); - - if (trace_flags & TRACE_ITER_BLOCK) - return; - schedule_delayed_work(&wakeup_work, delay); -} - static int __init set_buf_size(char *str) { unsigned long buf_size; @@ -753,6 +752,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ +static void default_wait_pipe(struct trace_iterator *iter) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); + + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + trace_wakeup_needed = true; + + if (trace_empty(iter)) + schedule(); + + finish_wait(&trace_wait, &wait); +} + /** * register_tracer - register a tracer with the ftrace system. * @type - the plugin for the tracer @@ -1156,30 +1189,32 @@ void __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) { __this_cpu_write(trace_cmdline_save, true); + if (trace_wakeup_needed) { + trace_wakeup_needed = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&trace_work_wakeup); + } ring_buffer_unlock_commit(buffer, event); } static inline void __trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, - unsigned long flags, int pc, - int wake) + unsigned long flags, int pc) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); ftrace_trace_userstack(buffer, flags, pc); - - if (wake) - trace_wake_up(); } void trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc); } +EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, @@ -1196,29 +1231,21 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc) { - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); + __trace_buffer_unlock_commit(buffer, event, flags, pc); } EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); -void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) -{ - __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); -} -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); - -void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc, - struct pt_regs *regs) +void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, + struct ring_buffer_event *event, + unsigned long flags, int pc, + struct pt_regs *regs) { __buffer_unlock_commit(buffer, event); ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); +EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); void trace_current_buffer_discard_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) @@ -3354,19 +3381,6 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) } } - -void default_wait_pipe(struct trace_iterator *iter) -{ - DEFINE_WAIT(wait); - - prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); - - if (trace_empty(iter)) - schedule(); - - finish_wait(&trace_wait, &wait); -} - /* * This is a make-shift waitqueue. * A tracer might use this callback on some rare cases: @@ -5107,6 +5121,7 @@ __init static int tracer_alloc_buffers(void) #endif trace_init_cmdlines(); + init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); current_trace = &nop_trace; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3e8a176f64e..55010ed175f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -327,7 +327,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); -void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); void tracing_reset_current(int cpu); @@ -349,9 +348,6 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long len, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -370,7 +366,6 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void default_wait_pipe(struct trace_iterator *iter); void poll_wait_pipe(struct trace_iterator *iter); void ftrace(struct trace_array *tr, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index cb2df3b70f7..880073d0b94 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1760,7 +1760,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(buffer, event, flags, pc); out: atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5a3c533ef06..1865d5f7653 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -751,8 +751,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_nowake_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); } /* Kretprobe handler */ @@ -784,8 +784,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) - trace_nowake_buffer_unlock_commit_regs(buffer, event, - irq_flags, pc, regs); + trace_buffer_unlock_commit_regs(buffer, event, + irq_flags, pc, regs); } /* Event entry printers */ diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index b0a136ac382..3374c792ccd 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -102,7 +102,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); if (!filter_check_discard(call, entry, buffer, event)) - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); + trace_buffer_unlock_commit(buffer, event, flags, pc); } static void diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 091b815f7b0..47623169a81 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1094,6 +1094,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); + printk("ret = %d\n", ret); if (!ret) ret = trace_test_buffer(&max_tr, &count); -- cgit v1.2.3-70-g09d2 From 19f5ee2716373519fda2129e9333f4c3847aa742 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 28 Oct 2012 18:14:14 +0100 Subject: uprobes: Kill arch_uprobe_enable/disable_step() hooks Kill arch_uprobe_enable/disable_step() hooks, they do nothing and nobody needs them. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- include/linux/uprobes.h | 2 -- kernel/events/uprobes.c | 10 ---------- 2 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 24594571c5a..2615c4d7788 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -101,8 +101,6 @@ extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); -extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch); -extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index abbfd8440a6..39c75cc51ef 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1430,14 +1430,6 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) return uprobe; } -void __weak arch_uprobe_enable_step(struct arch_uprobe *arch) -{ -} - -void __weak arch_uprobe_disable_step(struct arch_uprobe *arch) -{ -} - /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1491,7 +1483,6 @@ static void handle_swbp(struct pt_regs *regs) goto out; if (!pre_ssout(uprobe, regs, bp_vaddr)) { - arch_uprobe_enable_step(&uprobe->arch); utask->active_uprobe = uprobe; utask->state = UTASK_SSTEP; return; @@ -1523,7 +1514,6 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) else WARN_ON_ONCE(1); - arch_uprobe_disable_step(&uprobe->arch); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; -- cgit v1.2.3-70-g09d2 From 8cbd9cc6254065c97c4bac42daa55ba1abe73a8e Mon Sep 17 00:00:00 2001 From: David Sharp Date: Tue, 13 Nov 2012 12:18:21 -0800 Subject: tracing,x86: Add a TSC trace_clock In order to promote interoperability between userspace tracers and ftrace, add a trace_clock that reports raw TSC values which will then be recorded in the ring buffer. Userspace tracers that also record TSCs are then on exactly the same time base as the kernel and events can be unambiguously interlaced. Tested: Enabled a tracepoint and the "tsc" trace_clock and saw very large timestamp values. v2: Move arch-specific bits out of generic code. v3: Rename "x86-tsc", cleanups v7: Generic arch bits in Kbuild. Google-Bug-Id: 6980623 Link: http://lkml.kernel.org/r/1352837903-32191-1-git-send-email-dhsharp@google.com Acked-by: Ingo Molnar Cc: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- arch/alpha/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/h8300/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/trace_clock.h | 20 ++++++++++++++++++++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/trace_clock.c | 21 +++++++++++++++++++++ arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/trace_clock.h | 16 ++++++++++++++++ include/linux/trace_clock.h | 2 ++ kernel/trace/trace.c | 1 + 33 files changed, 88 insertions(+) create mode 100644 arch/x86/include/asm/trace_clock.h create mode 100644 arch/x86/kernel/trace_clock.c create mode 100644 include/asm-generic/trace_clock.h (limited to 'include') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 64ffc9e9e54..dcfabb9f05a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -11,3 +11,4 @@ header-y += reg.h header-y += regdef.h header-y += sysinfo.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index f70ae175a3d..514e398f1a0 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -31,5 +31,6 @@ generic-y += sockios.h generic-y += termbits.h generic-y += termios.h generic-y += timex.h +generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index a581a220593..6e9ca462127 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -43,6 +43,7 @@ generic-y += swab.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h generic-y += user.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 4807ded352c..4dd4f78d3dc 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 5a0625aad6a..27d70759474 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -38,6 +38,7 @@ generic-y += statfs.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 112a496d835..eae7b5963e8 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -49,6 +49,7 @@ generic-y += termbits.h generic-y += termios.h generic-y += tlbflush.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += user.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 6d43a951b5e..15a122c3767 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -11,3 +11,4 @@ header-y += sync_serial.h generic-y += clkdev.h generic-y += exec.h generic-y += module.h +generic-y += trace_clock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 4a159da2363..c5d76702830 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 50bbf387b2f..4bc8ae73e08 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm generic-y += clkdev.h generic-y += exec.h generic-y += module.h +generic-y += trace_clock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 3bfa9b30f44..bdb54ceb53b 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -48,6 +48,7 @@ generic-y += stat.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index dd02f09b6ed..05b03ecd793 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += kvm_para.h +generic-y += trace_clock.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 50bbf387b2f..4bc8ae73e08 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm generic-y += clkdev.h generic-y += exec.h generic-y += module.h +generic-y += trace_clock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 88fa3ac86fa..7f1949c0e08 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += sections.h generic-y += siginfo.h generic-y += statfs.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 8653072d7e9..2957fcc7176 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm header-y += elf.h generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 533053d12ce..9b54b7a403d 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1 +1,2 @@ # MIPS headers +generic-y += trace_clock.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 4a159da2363..c5d76702830 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += trace_clock.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 78de6805268..8971026e1c6 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -60,6 +60,7 @@ generic-y += swab.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += user.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index bac8debecff..ff4c9faed54 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \ div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \ poll.h xor.h clkdev.h exec.h +generic-y += trace_clock.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index a4fe15e33c6..2d62b484b3f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += rwsem.h +generic-y += trace_clock.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 0633dc6d254..f313f9cbcf4 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += clkdev.h +generic-y += trace_clock.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index ec697aeefd0..16e41fe1a41 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -3,3 +3,4 @@ include include/asm-generic/Kbuild.asm header-y += generic-y += clkdev.h +generic-y += trace_clock.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 29f83beeef7..280bea9e5e2 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -31,5 +31,6 @@ generic-y += socket.h generic-y += statfs.h generic-y += termbits.h generic-y += termios.h +generic-y += trace_clock.h generic-y += ucontext.h generic-y += xor.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 645a58da0e8..e26d430ce2f 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -8,4 +8,5 @@ generic-y += local64.h generic-y += irq_regs.h generic-y += local.h generic-y += module.h +generic-y += trace_clock.h generic-y += word-at-a-time.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 6948015e08a..b17b9b8e53c 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -34,5 +34,6 @@ generic-y += sockios.h generic-y += statfs.h generic-y += termbits.h generic-y += termios.h +generic-y += trace_clock.h generic-y += types.h generic-y += xor.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 0f6e7b32826..b30f34a7988 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h generic-y += switch_to.h clkdev.h +generic-y += trace_clock.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index c910c9857e1..7be503e4569 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -54,6 +54,7 @@ generic-y += syscalls.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h new file mode 100644 index 00000000000..5c1652728b6 --- /dev/null +++ b/arch/x86/include/asm/trace_clock.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_TRACE_CLOCK_H +#define _ASM_X86_TRACE_CLOCK_H + +#include +#include + +#ifdef CONFIG_X86_TSC + +extern u64 notrace trace_clock_x86_tsc(void); + +# define ARCH_TRACE_CLOCKS \ + { trace_clock_x86_tsc, "x86-tsc" }, + +#else /* !CONFIG_X86_TSC */ + +#define ARCH_TRACE_CLOCKS + +#endif + +#endif /* _ASM_X86_TRACE_CLOCK_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9fd5eed3f8f..34e923a5376 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c new file mode 100644 index 00000000000..25b993729f9 --- /dev/null +++ b/arch/x86/kernel/trace_clock.c @@ -0,0 +1,21 @@ +/* + * X86 trace clocks + */ +#include +#include +#include + +/* + * trace_clock_x86_tsc(): A clock that is just the cycle counter. + * + * Unlike the other clocks, this is not in nanoseconds. + */ +u64 notrace trace_clock_x86_tsc(void) +{ + u64 ret; + + rdtsc_barrier(); + rdtscll(ret); + + return ret; +} diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 6d130278999..095f0a2244f 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -25,4 +25,5 @@ generic-y += siginfo.h generic-y += statfs.h generic-y += termios.h generic-y += topology.h +generic-y += trace_clock.h generic-y += xor.h diff --git a/include/asm-generic/trace_clock.h b/include/asm-generic/trace_clock.h new file mode 100644 index 00000000000..6726f1bafb5 --- /dev/null +++ b/include/asm-generic/trace_clock.h @@ -0,0 +1,16 @@ +#ifndef _ASM_GENERIC_TRACE_CLOCK_H +#define _ASM_GENERIC_TRACE_CLOCK_H +/* + * Arch-specific trace clocks. + */ + +/* + * Additional trace clocks added to the trace_clocks + * array in kernel/trace/trace.c + * None if the architecture has not defined it. + */ +#ifndef ARCH_TRACE_CLOCKS +# define ARCH_TRACE_CLOCKS +#endif + +#endif /* _ASM_GENERIC_TRACE_CLOCK_H */ diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h index 4eb490237d4..d563f37e1a1 100644 --- a/include/linux/trace_clock.h +++ b/include/linux/trace_clock.h @@ -12,6 +12,8 @@ #include #include +#include + extern u64 notrace trace_clock_local(void); extern u64 notrace trace_clock(void); extern u64 notrace trace_clock_global(void); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c1434b5ce4d..0d20620c0d2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -488,6 +488,7 @@ static struct { { trace_clock_local, "local" }, { trace_clock_global, "global" }, { trace_clock_counter, "counter" }, + ARCH_TRACE_CLOCKS }; int trace_clock_id; -- cgit v1.2.3-70-g09d2 From 8be0709f10e3dd5d7d07933ad61a9f18c4b93ca5 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Tue, 13 Nov 2012 12:18:22 -0800 Subject: tracing: Format non-nanosec times from tsc clock without a decimal point. With the addition of the "tsc" clock, formatting timestamps to look like fractional seconds is misleading. Mark clocks as either in nanoseconds or not, and format non-nanosecond timestamps as decimal integers. Tested: $ cd /sys/kernel/debug/tracing/ $ cat trace_clock [local] global tsc $ echo sched_switch > set_event $ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on $ cat trace -0 [000] 6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120 sleep-29964 [000] 6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... $ echo 1 > options/latency-format $ cat trace -0 0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120 sleep-29964 0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... $ echo tsc > trace_clock $ cat trace $ echo 1 > tracing_on ; sleep 0.0005 ; echo 0 > tracing_on $ echo 0 > options/latency-format $ cat trace -0 [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120 sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... echo 1 > options/latency-format $ cat trace -0 0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120 sleep-31128 0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120 ... v2: Move arch-specific bits out of generic code. v4: Fix x86_32 build due to 64-bit division. Google-Bug-Id: 6980623 Link: http://lkml.kernel.org/r/1352837903-32191-2-git-send-email-dhsharp@google.com Cc: Masami Hiramatsu Signed-off-by: David Sharp Signed-off-by: Steven Rostedt --- arch/x86/include/asm/trace_clock.h | 2 +- include/linux/ftrace_event.h | 6 +++ kernel/trace/trace.c | 15 ++++++-- kernel/trace/trace.h | 4 -- kernel/trace/trace_output.c | 78 ++++++++++++++++++++++++++------------ 5 files changed, 72 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h index 5c1652728b6..beab86cc282 100644 --- a/arch/x86/include/asm/trace_clock.h +++ b/arch/x86/include/asm/trace_clock.h @@ -9,7 +9,7 @@ extern u64 notrace trace_clock_x86_tsc(void); # define ARCH_TRACE_CLOCKS \ - { trace_clock_x86_tsc, "x86-tsc" }, + { trace_clock_x86_tsc, "x86-tsc", .in_ns = 0 }, #else /* !CONFIG_X86_TSC */ diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b80c8ddfbbd..a3d489531d8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -86,6 +86,12 @@ struct trace_iterator { cpumask_var_t started; }; +enum trace_iter_flags { + TRACE_FILE_LAT_FMT = 1, + TRACE_FILE_ANNOTATE = 2, + TRACE_FILE_TIME_IN_NS = 4, +}; + struct trace_event; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0d20620c0d2..d943e69569c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -484,10 +484,11 @@ static const char *trace_options[] = { static struct { u64 (*func)(void); const char *name; + int in_ns; /* is this clock in nanoseconds? */ } trace_clocks[] = { - { trace_clock_local, "local" }, - { trace_clock_global, "global" }, - { trace_clock_counter, "counter" }, + { trace_clock_local, "local", 1 }, + { trace_clock_global, "global", 1 }, + { trace_clock_counter, "counter", 0 }, ARCH_TRACE_CLOCKS }; @@ -2478,6 +2479,10 @@ __tracing_open(struct inode *inode, struct file *file) if (ring_buffer_overruns(iter->tr->buffer)) iter->iter_flags |= TRACE_FILE_ANNOTATE; + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[trace_clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + /* stop the trace while dumping */ tracing_stop(); @@ -3339,6 +3344,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) if (trace_flags & TRACE_ITER_LATENCY_FMT) iter->iter_flags |= TRACE_FILE_LAT_FMT; + /* Output in nanoseconds only if we are using a clock in nanoseconds. */ + if (trace_clocks[trace_clock_id].in_ns) + iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + iter->cpu_file = cpu_file; iter->tr = &global_trace; mutex_init(&iter->mutex); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55010ed175f..c75d7988902 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -406,10 +406,6 @@ void tracing_stop_sched_switch_record(void); void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); int is_tracing_stopped(void); -enum trace_file_type { - TRACE_FILE_LAT_FMT = 1, - TRACE_FILE_ANNOTATE = 2, -}; extern cpumask_var_t __read_mostly tracing_buffer_mask; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 123b189c732..194d79602dc 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -610,24 +610,54 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) return trace_print_lat_fmt(s, entry); } -static unsigned long preempt_mark_thresh = 100; +static unsigned long preempt_mark_thresh_us = 100; static int -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs, - unsigned long rel_usecs) +lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) { - return trace_seq_printf(s, " %4lldus%c: ", abs_usecs, - rel_usecs > preempt_mark_thresh ? '!' : - rel_usecs > 1 ? '+' : ' '); + unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE; + unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS; + unsigned long long abs_ts = iter->ts - iter->tr->time_start; + unsigned long long rel_ts = next_ts - iter->ts; + struct trace_seq *s = &iter->seq; + + if (in_ns) { + abs_ts = ns2usecs(abs_ts); + rel_ts = ns2usecs(rel_ts); + } + + if (verbose && in_ns) { + unsigned long abs_usec = do_div(abs_ts, USEC_PER_MSEC); + unsigned long abs_msec = (unsigned long)abs_ts; + unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC); + unsigned long rel_msec = (unsigned long)rel_ts; + + return trace_seq_printf( + s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", + ns2usecs(iter->ts), + abs_msec, abs_usec, + rel_msec, rel_usec); + } else if (verbose && !in_ns) { + return trace_seq_printf( + s, "[%016llx] %lld (+%lld): ", + iter->ts, abs_ts, rel_ts); + } else if (!verbose && in_ns) { + return trace_seq_printf( + s, " %4lldus%c: ", + abs_ts, + rel_ts > preempt_mark_thresh_us ? '!' : + rel_ts > 1 ? '+' : ' '); + } else { /* !verbose && !in_ns */ + return trace_seq_printf(s, " %4lld: ", abs_ts); + } } int trace_print_context(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct trace_entry *entry = iter->ent; - unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, USEC_PER_SEC); - unsigned long secs = (unsigned long)t; + unsigned long long t; + unsigned long secs, usec_rem; char comm[TASK_COMM_LEN]; int ret; @@ -644,8 +674,13 @@ int trace_print_context(struct trace_iterator *iter) return 0; } - return trace_seq_printf(s, " %5lu.%06lu: ", - secs, usec_rem); + if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { + t = ns2usecs(iter->ts); + usec_rem = do_div(t, USEC_PER_SEC); + secs = (unsigned long)t; + return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); + } else + return trace_seq_printf(s, " %12llu: ", iter->ts); } int trace_print_lat_context(struct trace_iterator *iter) @@ -659,36 +694,29 @@ int trace_print_lat_context(struct trace_iterator *iter) *next_entry = trace_find_next_entry(iter, NULL, &next_ts); unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); - unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start); - unsigned long rel_usecs; /* Restore the original ent_size */ iter->ent_size = ent_size; if (!next_entry) next_ts = iter->ts; - rel_usecs = ns2usecs(next_ts - iter->ts); if (verbose) { char comm[TASK_COMM_LEN]; trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" - " %ld.%03ldms (+%ld.%03ldms): ", comm, - entry->pid, iter->cpu, entry->flags, - entry->preempt_count, iter->idx, - ns2usecs(iter->ts), - abs_usecs / USEC_PER_MSEC, - abs_usecs % USEC_PER_MSEC, - rel_usecs / USEC_PER_MSEC, - rel_usecs % USEC_PER_MSEC); + ret = trace_seq_printf( + s, "%16s %5d %3d %d %08x %08lx ", + comm, entry->pid, iter->cpu, entry->flags, + entry->preempt_count, iter->idx); } else { ret = lat_print_generic(s, entry, iter->cpu); - if (ret) - ret = lat_print_timestamp(s, abs_usecs, rel_usecs); } + if (ret) + ret = lat_print_timestamp(iter, next_ts); + return ret; } -- cgit v1.2.3-70-g09d2 From 1c7d66732458dc187008e3f5b2f71e019e320fc2 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sat, 3 Nov 2012 12:38:33 +0800 Subject: tracing: Kill unused and puzzled sample code in ftrace.h When doing per-cpu helper optimizing work, find that this code is so puzzled. 1. It's mark as comment text, maybe a sample function for guidelines or a todo work. 2. But, this sample code is odd where struct perf_trace_buf is nonexistent. commit ce71b9 delete struct perf_trace_buf definition. Author: Frederic Weisbecker Date: Sun Nov 22 05:26:55 2009 +0100 tracing: Use the perf recursion protection from trace event Is it necessary to keep there? just compile test. Link: http://lkml.kernel.org/r/50949FC9.6050202@gmail.com Signed-off-by: Shan Wei Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 73 -------------------------------------------------- 1 file changed, 73 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 698f2a89032..40dc5e8fe34 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -619,79 +619,6 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * Define the insertion callback to perf events - * - * The job is very similar to ftrace_raw_event_ except that we don't - * insert in the ring buffer but in a perf counter. - * - * static void ftrace_perf_(proto) - * { - * struct ftrace_data_offsets_ __maybe_unused __data_offsets; - * struct ftrace_event_call *event_call = &event_; - * extern void perf_tp_event(int, u64, u64, void *, int); - * struct ftrace_raw_##call *entry; - * struct perf_trace_buf *trace_buf; - * u64 __addr = 0, __count = 1; - * unsigned long irq_flags; - * struct trace_entry *ent; - * int __entry_size; - * int __data_size; - * int __cpu - * int pc; - * - * pc = preempt_count(); - * - * __data_size = ftrace_get_offsets_(&__data_offsets, args); - * - * // Below we want to get the aligned size by taking into account - * // the u32 field that will later store the buffer size - * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), - * sizeof(u64)); - * __entry_size -= sizeof(u32); - * - * // Protect the non nmi buffer - * // This also protects the rcu read side - * local_irq_save(irq_flags); - * __cpu = smp_processor_id(); - * - * if (in_nmi()) - * trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); - * else - * trace_buf = rcu_dereference_sched(perf_trace_buf); - * - * if (!trace_buf) - * goto end; - * - * trace_buf = per_cpu_ptr(trace_buf, __cpu); - * - * // Avoid recursion from perf that could mess up the buffer - * if (trace_buf->recursion++) - * goto end_recursion; - * - * raw_data = trace_buf->buf; - * - * // Make recursion update visible before entering perf_tp_event - * // so that we protect from perf recursions. - * - * barrier(); - * - * //zero dead bytes from alignment to avoid stack leak to userspace: - * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; - * entry = (struct ftrace_raw_ *)raw_data; - * ent = &entry->ent; - * tracing_generic_entry_update(ent, irq_flags, pc); - * ent->type = event_call->id; - * - * <- do some jobs with dynamic arrays - * - * <- affect our values - * - * perf_tp_event(event_call->id, __addr, __count, entry, - * __entry_size); <- submit them to perf counter - * - * } - */ #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3-70-g09d2 From 32cdba1e05418909708a17e52505e8b2ba4381d1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 14 Nov 2012 19:03:42 +0100 Subject: uprobes: Use percpu_rw_semaphore to fix register/unregister vs dup_mmap() race This was always racy, but 268720903f87e0b84b161626c4447b81671b5d18 "uprobes: Rework register_for_each_vma() to make it O(n)" should be blamed anyway, it made everything worse and I didn't notice. register/unregister call build_map_info() and then do install/remove breakpoint for every mm which mmaps inode/offset. This can obviously race with fork()->dup_mmap() in between and we can miss the child. uprobe_register() could be easily fixed but unregister is much worse, the new mm inherits "int3" from parent and there is no way to detect this if uprobe goes away. So this patch simply adds percpu_down_read/up_read around dup_mmap(), and percpu_down_write/up_write into register_for_each_vma(). This adds 2 new hooks into dup_mmap() but we can kill uprobe_dup_mmap() and fold it into uprobe_end_dup_mmap(). Reported-by: Srikar Dronamraju Acked-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 8 ++++++++ kernel/events/uprobes.c | 26 +++++++++++++++++++++++--- kernel/fork.c | 2 ++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 2615c4d7788..4f628a6fc5b 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -97,6 +97,8 @@ extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_con extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_start_dup_mmap(void); +extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); @@ -127,6 +129,12 @@ static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void uprobe_start_dup_mmap(void) +{ +} +static inline void uprobe_end_dup_mmap(void) +{ +} static inline void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5ce99cfd2e6..dea7acfbb07 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -33,6 +33,7 @@ #include /* user_enable_single_step */ #include /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ +#include #include @@ -71,6 +72,8 @@ static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) +static struct percpu_rw_semaphore dup_mmap_sem; + /* * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe * events active at this time. Probably a fine grained per inode count is @@ -766,10 +769,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) struct map_info *info; int err = 0; + percpu_down_write(&dup_mmap_sem); info = build_map_info(uprobe->inode->i_mapping, uprobe->offset, is_register); - if (IS_ERR(info)) - return PTR_ERR(info); + if (IS_ERR(info)) { + err = PTR_ERR(info); + goto out; + } while (info) { struct mm_struct *mm = info->mm; @@ -799,7 +805,8 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) mmput(mm); info = free_map_info(info); } - + out: + percpu_up_write(&dup_mmap_sem); return err; } @@ -1131,6 +1138,16 @@ void uprobe_clear_state(struct mm_struct *mm) kfree(area); } +void uprobe_start_dup_mmap(void) +{ + percpu_down_read(&dup_mmap_sem); +} + +void uprobe_end_dup_mmap(void) +{ + percpu_up_read(&dup_mmap_sem); +} + void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { newmm->uprobes_state.xol_area = NULL; @@ -1597,6 +1614,9 @@ static int __init init_uprobes(void) mutex_init(&uprobes_mmap_mutex[i]); } + if (percpu_init_rwsem(&dup_mmap_sem)) + return -ENOMEM; + return register_die_notifier(&uprobe_exception_nb); } module_init(init_uprobes); diff --git a/kernel/fork.c b/kernel/fork.c index 8b20ab7d3aa..c497e57aa65 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -352,6 +352,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) unsigned long charge; struct mempolicy *pol; + uprobe_start_dup_mmap(); down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); uprobe_dup_mmap(oldmm, mm); @@ -469,6 +470,7 @@ out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); + uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: mpol_put(pol); -- cgit v1.2.3-70-g09d2