From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:53 -0400 Subject: tracing: Ftrace dynamic ftrace_event_call support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dynamic ftrace_event_call support to ftrace. Trace engines can add new ftrace_event_call to ftrace on the fly. Each operator function of the call takes an ftrace_event_call data structure as an argument, because these functions may be shared among several ftrace_event_calls. Changes from v13: - Define remove_subsystem_dir() always (revirt a2ca5e03), because trace_remove_event_call() uses it. - Modify syscall tracer because of ftrace_event_call change. [fweisbec@gmail.com: Fixed conflict against latest tracing/core] Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Przemysław Pawełczyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 16 ++++++++-------- include/trace/syscall.h | 11 +++++++---- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 360a77ad79e..f2bd7a8f8e8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -434,7 +434,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * event_trace_printk(_RET_IP_, ": " ); * } * - * static int ftrace_reg_event_(void) + * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -445,7 +445,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_event_); * } @@ -478,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_(void) + * static int ftrace_raw_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -489,7 +489,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_raw_event_); * } @@ -498,7 +498,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(void) + * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) * { * int id; * @@ -592,7 +592,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -603,7 +603,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -612,7 +612,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae..e290b86f616 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -- cgit v1.2.3-70-g09d2 From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 1 Aug 2009 01:34:24 +0200 Subject: tracing/bkl: Add bkl ftrace events Add two events lock_kernel and unlock_kernel() to trace the bkl uses. This opens the door for userspace tools to perform statistics about the callsites that use it, dependencies with other locks (by pairing the trace with lock events), use with recursivity and so on... The {__reacquire,release}_kernel_lock() events are not traced because these are called from schedule, thus the sched events are sufficient to trace them. Example of a trace: hald-addon-stor-4152 [000] 165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put() hald-addon-stor-4152 [000] 167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get() How to get the callsites that acquire it recursively: cd /debug/tracing/events/bkl echo "lock_depth > 0" > filter firefox-4951 [001] 206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode() You can also filter by file and/or line. v2: Use of FILTER_PTR_STRING attribute for files and lines fields to make them traceable. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan --- include/linux/smp_lock.h | 19 ++++++++++++--- include/trace/events/bkl.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++ lib/kernel_lock.c | 11 +++++---- 3 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 include/trace/events/bkl.h (limited to 'include/trace') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59bf34..d48cc77ba70 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,6 +3,7 @@ #ifdef CONFIG_LOCK_KERNEL #include +#include #define kernel_locked() (current->lock_depth >= 0) @@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); +extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); + +#define lock_kernel() { \ + trace_lock_kernel(__func__, __FILE__, __LINE__); \ + _lock_kernel(); \ +} + +#define unlock_kernel() { \ + trace_unlock_kernel(__func__, __FILE__, __LINE__); \ + _unlock_kernel(); \ +} /* * Various legacy drivers don't really need the BKL in a specific @@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() do { } while(0) -#define unlock_kernel() do { } while(0) +#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); +#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h new file mode 100644 index 00000000000..8abd620a490 --- /dev/null +++ b/include/trace/events/bkl.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bkl + +#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BKL_H + +#include + +TRACE_EVENT(lock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field( int, lock_depth ) + __field_ext( const char *, func, FILTER_PTR_STRING ) + __field_ext( const char *, file, FILTER_PTR_STRING ) + __field( int, line ) + ), + + TP_fast_assign( + /* We want to record the lock_depth after lock is acquired */ + __entry->lock_depth = current->lock_depth + 1; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +TRACE_EVENT(unlock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field(int, lock_depth) + __field(const char *, func) + __field(const char *, file) + __field(int, line) + ), + + TP_fast_assign( + __entry->lock_depth = current->lock_depth; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +#endif /* _TRACE_BKL_H */ + +/* This part must be outside protection */ +#include diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029e352..5c10b2e1fd0 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,11 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include #include #include #include +#define CREATE_TRACE_POINTS +#include /* * The 'big kernel lock' @@ -113,7 +114,7 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(void) { int depth = current->lock_depth+1; if (likely(!depth)) @@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void) current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(void) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); -- cgit v1.2.3-70-g09d2 From b0da3f0dada78832c9da03ad2152ae76bd9a2496 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 1 Oct 2009 21:16:13 +0200 Subject: Add a tracepoint for block request remapping Since 2.6.31 now has request-based device-mapper, it's useful to have a tracepoint for request-remapping as well as bio-remapping. This patch adds a tracepoint for request-remapping, trace_block_rq_remap(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Cc: Li Zefan Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + include/linux/blktrace_api.h | 2 +- include/trace/events/block.h | 33 +++++++++++++++++++++++++++++++++ kernel/trace/blktrace.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/block/blk-core.c b/block/blk-core.c index 34504f30972..ddaaea4fdff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,6 +34,7 @@ #include "blk.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 622939a2329..3b73b9992b2 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) +# define blk_trace_remove_sysfs(dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d86af94691c..00405b5f624 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -488,6 +488,39 @@ TRACE_EVENT(block_remap, (unsigned long long)__entry->old_sector) ); +TRACE_EVENT(block_rq_remap, + + TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, + sector_t from), + + TP_ARGS(q, rq, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(rq->rq_disk); + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = blk_rq_sectors(rq); + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs_rq(__entry->rwbs, rq); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + (unsigned long long)__entry->old_sector) +); + #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 60b5c5a3d4b..d9d6206e0b1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -855,6 +855,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, sizeof(r), &r); } +/** + * blk_add_trace_rq_remap - Add a trace for a request-remap operation + * @q: queue the io is for + * @rq: the source request + * @dev: target device + * @from: source sector + * + * Description: + * Device mapper remaps request to other devices. + * Add a trace for that action. + * + **/ +static void blk_add_trace_rq_remap(struct request_queue *q, + struct request *rq, dev_t dev, + sector_t from) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); + r.sector_from = cpu_to_be64(from); + + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, + sizeof(r), &r); +} + /** * blk_add_driver_data - Add binary message with driver-specific data * @q: queue the io is for @@ -922,10 +953,13 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); + ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); + WARN_ON(ret); } static void blk_unregister_tracepoints(void) { + unregister_trace_block_rq_remap(blk_add_trace_rq_remap); unregister_trace_block_remap(blk_add_trace_remap); unregister_trace_block_split(blk_add_trace_split); unregister_trace_block_unplug_io(blk_add_trace_unplug_io); -- cgit v1.2.3-70-g09d2 From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:50 -0500 Subject: tracing/events: Add 'signed' field to format files The sign info used for filters in the kernel is also useful to applications that process the trace stream. Add it to the format files and make it available to userspace. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 15 +++++++++------ kernel/trace/ring_buffer.c | 15 +++++++++------ kernel/trace/trace_events.c | 24 ++++++++++++------------ kernel/trace/trace_export.c | 25 ++++++++++++++----------- kernel/trace/trace_syscalls.c | 20 +++++++++++++------- tools/perf/util/trace-event-parse.c | 24 ++++++++++++++++++++++++ tools/perf/util/trace-event.h | 1 + 7 files changed, 82 insertions(+), 42 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e18..c9bbcab95fb 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -120,9 +120,10 @@ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -132,19 +133,21 @@ #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ + (unsigned int)sizeof(field.__data_loc_##item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d4ff0197054..e43c928356e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d128f65778e..cf3cabf6ce1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -507,7 +507,7 @@ extern char *__bad_type_size(void); #define FIELD(type, name) \ sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name) + sizeof(field.name), is_signed_type(type) static int trace_write_header(struct trace_seq *s) { @@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\n", + FIELD(unsigned short, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9753fcc61bc..31da218ee10 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __field_desc #define __field_desc(type, container, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __array_desc #define __array_desc(type, container, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ + "offset:%zu;\tsize:0;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 527e17eae57..d99abc427c3 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -103,7 +103,8 @@ extern char *__bad_type_size(void); #define SYSCALL_FIELD(type, name) \ sizeof(type) != sizeof(trace.name) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) + #type, #name, offsetof(typeof(trace), name), \ + sizeof(trace.name), is_signed_type(type) int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { @@ -120,7 +121,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) if (!entry) return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); if (!ret) return 0; @@ -130,8 +132,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) entry->args[i]); if (!ret) return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, - sizeof(unsigned long)); + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" + "\tsigned:%u;\n", offset, + sizeof(unsigned long), + is_signed_type(unsigned long)); if (!ret) return 0; offset += sizeof(unsigned long); @@ -163,8 +167,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) struct syscall_trace_exit trace; ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr), SYSCALL_FIELD(long, ret)); if (!ret) @@ -212,7 +218,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); return ret; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 55b41b9e383..be8412d699a 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -894,6 +894,21 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->size = strtoul(token, NULL, 0); free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) goto fail_expect; @@ -2843,6 +2858,15 @@ static void parse_header_field(char *type, return; *size = atoi(token); free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + free_token(token); if (read_expected(EVENT_OP, (char *)";") < 0) return; if (read_expect_type(EVENT_NEWLINE, &token) < 0) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 162c3e6deb9..00b440df66d 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -26,6 +26,7 @@ enum { enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, }; struct format_field { -- cgit v1.2.3-70-g09d2 From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 13 Oct 2009 16:19:41 +0900 Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging This approach is the first baby step towards solving many of the structural problems the x86 MCE logging code is having today: - It has a private ring-buffer implementation that has a number of limitations and has been historically fragile and buggy. - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE specific. /dev/mcelog is not part of any larger logging framework and hence has remained on the fringes for many years. - The MCE logging code is still very unclean partly due to its ABI limitations. Fields are being reused for multiple purposes, and the whole message structure is limited and x86 specific to begin with. All in one, the x86 tree would like to move away from this private implementation of an event logging facility to a broader framework. By using perf events we gain the following advantages: - Multiple user-space agents can access MCE events. We can have an mcelog daemon running but also a system-wide tracer capturing important events in flight-recorder mode. - Sampling support: the kernel and the user-space call-chain of MCE events can be stored and analyzed as well. This way actual patterns of bad behavior can be matched to precisely what kind of activity happened in the kernel (and/or in the app) around that moment in time. - Coupling with other hardware and software events: the PMU can track a number of other anomalies - monitoring software might chose to monitor those plus the MCE events as well - in one coherent stream of events. - Discovery of MCE sources - tracepoints are enumerated and tools can act upon the existence (or non-existence) of various channels of MCE information. - Filtering support: we just subscribe to and act upon the events we are interested in. Then even on a per event source basis there's in-kernel filter expressions available that can restrict the amount of data that hits the event channel. - Arbitrary deep per cpu buffering of events - we can buffer 32 entries or we can buffer as much as we want, as long as we have the RAM. - An NMI-safe ring-buffer implementation - mappable to user-space. - Built-in support for timestamping of events, PID markers, CPU markers, etc. - A rich ABI accessible over system call interface. Per cpu, per task and per workload monitoring of MCE events can be done this way. The ABI itself has a nice, meaningful structure. - Extensible ABI: new fields can be added without breaking tooling. New tracepoints can be added as the hardware side evolves. There's various parsers that can be used. - Lots of scheduling/buffering/batching modes of operandi for MCE events. poll() support. mmap() support. read() support. You name it. - Rich tooling support: even without any MCE specific extensions added the 'perf' tool today offers various views of MCE data: perf report, perf stat, perf trace can all be used to view logged MCE events and perhaps correlate them to certain user-space usage patterns. But it can be used directly as well, for user-space agents and policy action in mcelog, etc. With this we hope to achieve significant code cleanup and feature improvements in the MCE code, and we hope to be able to drop the /dev/mcelog facility in the end. This patch is just a plain dumb dump of mce_log() records to the tracepoints / perf events framework - a first proof of concept step. Signed-off-by: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++++ include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 include/trace/events/mce.h (limited to 'include/trace') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d..39caea3d8bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -141,6 +144,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 00000000000..7eee77895cb --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include +#include +#include + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3-70-g09d2 From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 19 Sep 2009 06:50:42 +0200 Subject: tracing: Move syscalls metadata handling from arch to core Most of the syscalls metadata processing is done from arch. But these operations are mostly generic accross archs. Especially now that we have a common variable name that expresses the number of syscalls supported by an arch: NR_syscalls, the only remaining bits that need to reside in arch is the syscall nr to addr translation. v2: Compare syscalls symbols only after the "sys" prefix so that we avoid spurious mismatches with archs that have syscalls wrappers, in which case syscalls symbols have "SyS" prefixed aliases. (Reported by: Heiko Carstens) Signed-off-by: Frederic Weisbecker Acked-by: Heiko Carstens Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Paul Mundt --- arch/s390/kernel/ftrace.c | 67 +-------------------------------- arch/x86/kernel/ftrace.c | 76 +------------------------------------- include/trace/syscall.h | 2 +- kernel/trace/trace_syscalls.c | 86 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 140 deletions(-) (limited to 'include/trace') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 57bdcb1e3cd..7c5752c3423 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -206,73 +206,10 @@ out: #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned int sys_call_table[]; -static struct syscall_metadata **syscalls_metadata; - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - for (i = 0; i < NR_syscalls; i++) - if (syscalls_metadata[i]) - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup(syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; - } - return NULL; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - struct syscall_metadata *meta; - int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, - GFP_KERNEL); - if (!syscalls_metadata) - return -ENOMEM; - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta((unsigned long)sys_call_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)sys_call_table[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 25e6f5fc4b1..5a1b9758fd6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -470,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned long *sys_call_table; -static struct syscall_metadata **syscalls_metadata; - -static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name, str)) - return start; - } - return NULL; -} - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - int i; - struct syscall_metadata *meta; - unsigned long **psys_syscall_table = &sys_call_table; - - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); - if (!syscalls_metadata) { - WARN_ON(1); - return -ENOMEM; - } - - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta(psys_syscall_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)(&sys_call_table)[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283ba5ae..e972f0a40f8 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -33,7 +33,7 @@ struct syscall_metadata { }; #ifdef CONFIG_FTRACE_SYSCALLS -extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9fbce6c9d2e..8bda4bff228 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -14,6 +14,69 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +static struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i]) { + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + } + } + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = { .trace = print_syscall_exit, }; +int __init init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + unsigned long addr; + int i; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + NR_syscalls, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return -ENOMEM; + } + + for (i = 0; i < NR_syscalls; i++) { + addr = arch_syscall_addr(i); + meta = find_syscall_meta(addr); + syscalls_metadata[i] = meta; + } + + return 0; +} +core_initcall(init_ftrace_syscalls); + #ifdef CONFIG_EVENT_PROFILE static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); -- cgit v1.2.3-70-g09d2 From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 11:50:39 +0200 Subject: events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan Cc: Tom Zanussi Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/bkl.h | 18 +++++----- include/trace/events/irq.h | 8 ++--- include/trace/events/power.h | 2 -- include/trace/events/sched.h | 44 ++++++++++++------------ include/trace/events/timer.h | 79 ++++++++++++++++++++++---------------------- 5 files changed, 74 insertions(+), 77 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h index 8abd620a490..1af72dc2427 100644 --- a/include/trace/events/bkl.h +++ b/include/trace/events/bkl.h @@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field( int, lock_depth ) + __field( int, depth ) __field_ext( const char *, func, FILTER_PTR_STRING ) __field_ext( const char *, file, FILTER_PTR_STRING ) __field( int, line ) @@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel, TP_fast_assign( /* We want to record the lock_depth after lock is acquired */ - __entry->lock_depth = current->lock_depth + 1; + __entry->depth = current->lock_depth + 1; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); @@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field(int, lock_depth) - __field(const char *, func) - __field(const char *, file) - __field(int, line) + __field(int, depth ) + __field(const char *, func ) + __field(const char *, file ) + __field(int, line ) ), TP_fast_assign( - __entry->lock_depth = current->lock_depth; + __entry->depth = current->lock_depth; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index b89f9db4a40..dcfcd440762 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry, __assign_str(name, action->name); ), - TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) + TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) ); /** @@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit, __entry->ret = ret; ), - TP_printk("irq=%d return=%s", + TP_printk("irq=%d ret=%s", __entry->irq, __entry->ret ? "handled" : "unhandled") ); @@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); @@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ea6d579261a..9bb96e5a284 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,8 +16,6 @@ enum { }; #endif - - TRACE_EVENT(power_start, TP_PROTO(unsigned int type, unsigned int state), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43f418..b50b9856c59 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) + TP_printk("ret=%d", __entry->ret) ); /* @@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", @@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); @@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig = sig; ), - TP_printk("sig: %d task %s:%d", + TP_printk("sig=%d comm=%s pid=%d", __entry->sig, __entry->comm, __entry->pid) ); @@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait, __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime, __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) @@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep, __perf_count(delay); ), - TP_printk("task: %s:%d sleep: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait, __perf_count(delay); ), - TP_printk("task: %s:%d iowait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1844c48d640..e5ce87a0498 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -26,7 +26,7 @@ TRACE_EVENT(timer_init, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -54,7 +54,7 @@ TRACE_EVENT(timer_start, __entry->now = jiffies; ), - TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now) ); @@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry, __entry->now = jiffies; ), - TP_printk("timer %p: now %lu", __entry->timer, __entry->now) + TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) ); /** @@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel, */ TRACE_EVENT(hrtimer_init, - TP_PROTO(struct hrtimer *timer, clockid_t clockid, + TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), - TP_ARGS(timer, clockid, mode), + TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), - TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, __entry->clockid == CLOCK_REALTIME ? "CLOCK_REALTIME" : "CLOCK_MONOTONIC", __entry->mode == HRTIMER_MODE_ABS ? @@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) ), TP_fast_assign( - __entry->timer = timer; - __entry->function = timer->function; - __entry->expires = hrtimer_get_expires(timer).tv64; - __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + __entry->hrtimer = hrtimer; + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer).tv64; + __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; ), - TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", - __entry->timer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", + __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->expires }), (unsigned long long)ktime_to_ns((ktime_t) { @@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start, */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *timer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), - TP_ARGS(timer, now), + TP_ARGS(hrtimer, now), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( s64, now ) ), TP_fast_assign( - __entry->timer = timer; - __entry->now = now->tv64; + __entry->hrtimer = hrtimer; + __entry->now = now->tv64; ), - TP_printk("hrtimer %p, now %llu", __entry->timer, - (unsigned long long)ktime_to_ns((ktime_t) { - .tv64 = __entry->now })) + TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); /** @@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry, */ TRACE_EVENT(hrtimer_expire_exit, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled - * @timer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer */ TRACE_EVENT(hrtimer_cancel, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** @@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", __entry->which, __entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) @@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which %d, pid %d, now %lu", __entry->which, + TP_printk("which=%d pid=%d now=%lu", __entry->which, (int) __entry->pid, __entry->now) ); -- cgit v1.2.3-70-g09d2 From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Nov 2009 04:13:05 +0100 Subject: tracing, perf_events: Protect the buffer from recursion in perf While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 +++++-- include/trace/ftrace.h | 39 ++++++++++++++++++++++------- kernel/trace/trace_event_profile.c | 41 ++++++++++++++----------------- kernel/trace/trace_kprobe.c | 50 ++++++++++++++++++++++++++++++++------ kernel/trace/trace_syscalls.c | 44 +++++++++++++++++++++++++++------ 5 files changed, 133 insertions(+), 50 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f7b47c33670..43360c1d8f7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,8 +137,13 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *trace_profile_buf; -extern char *trace_profile_buf_nmi; +struct perf_trace_buf { + char buf[FTRACE_MAX_PROFILE_SIZE]; + int recursion; +}; + +extern struct perf_trace_buf *perf_trace_buf; +extern struct perf_trace_buf *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a7f94609412..4945d1c9986 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_event_call *event_call = &event_; * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; + * struct perf_trace_buf *trace_buf; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; * struct trace_entry *ent; @@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * raw_data = rcu_dereference(trace_profile_buf_nmi); + * trace_buf = rcu_dereference(perf_trace_buf_nmi); * else - * raw_data = rcu_dereference(trace_profile_buf); + * trace_buf = rcu_dereference(perf_trace_buf); * - * if (!raw_data) + * if (!trace_buf) * goto end; * - * raw_data = per_cpu_ptr(raw_data, __cpu); + * trace_buf = per_cpu_ptr(trace_buf, __cpu); + * + * // Avoid recursion from perf that could mess up the buffer + * if (trace_buf->recursion++) + * goto end_recursion; + * + * raw_data = trace_buf->buf; + * + * // Make recursion update visible before entering perf_tp_event + * // so that we protect from perf recursions. + * + * barrier(); * * //zero dead bytes from alignment to avoid stack leak to userspace: * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; @@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tp_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ + struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ @@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ - raw_data = rcu_dereference(trace_profile_buf_nmi); \ + trace_buf = rcu_dereference(perf_trace_buf_nmi); \ else \ - raw_data = rcu_dereference(trace_profile_buf); \ + trace_buf = rcu_dereference(perf_trace_buf); \ \ - if (!raw_data) \ + if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(raw_data, __cpu); \ + trace_buf = per_cpu_ptr(trace_buf, __cpu); \ + if (trace_buf->recursion++) \ + goto end_recursion; \ + \ + barrier(); \ + \ + raw_data = trace_buf->buf; \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ +end_recursion: \ + trace_buf->recursion--; \ end: \ local_irq_restore(irq_flags); \ \ diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index c9f687ab0d4..e0d351b01f5 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,41 +8,36 @@ #include #include "trace.h" -/* - * We can't use a size but a type in alloc_percpu() - * So let's create a dummy type that matches the desired size - */ -typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; -char *trace_profile_buf; -EXPORT_SYMBOL_GPL(trace_profile_buf); +struct perf_trace_buf *perf_trace_buf; +EXPORT_SYMBOL_GPL(perf_trace_buf); -char *trace_profile_buf_nmi; -EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); +struct perf_trace_buf *perf_trace_buf_nmi; +EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - char *buf; + struct perf_trace_buf *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf; - rcu_assign_pointer(trace_profile_buf, buf); + rcu_assign_pointer(perf_trace_buf, buf); - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf_nmi, buf); } ret = event->profile_enable(event); @@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) fail_buf_nmi: if (!total_profile_count) { - free_percpu(trace_profile_buf_nmi); - free_percpu(trace_profile_buf); - trace_profile_buf_nmi = NULL; - trace_profile_buf = NULL; + free_percpu(perf_trace_buf_nmi); + free_percpu(perf_trace_buf); + perf_trace_buf_nmi = NULL; + perf_trace_buf = NULL; } fail_buf: atomic_dec(&event->profile_count); @@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - char *buf, *nmi_buf; + struct perf_trace_buf *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; @@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) event->profile_disable(event); if (!--total_profile_count) { - buf = trace_profile_buf; - rcu_assign_pointer(trace_profile_buf, NULL); + buf = perf_trace_buf; + rcu_assign_pointer(perf_trace_buf, NULL); - nmi_buf = trace_profile_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, NULL); + nmi_buf = perf_trace_buf_nmi; + rcu_assign_pointer(perf_trace_buf_nmi, NULL); /* * Ensure every events in profiling have finished before diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cf17a6694f3..3696476f307 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kprobe_trace_entry *)raw_data; @@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } @@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kretprobe_trace_entry *)raw_data; @@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 58b8e537076..51213b0aa81 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,6 +477,7 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; + struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; char *raw_data; @@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } @@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; char *raw_data; @@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From 67178767b936fb47a3a5e88097cff41ccbda7acb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 10:06:34 +0100 Subject: tracing: Rename 'lockdep' event subsystem into 'lock' Lockdep events subsystem gathers various locking related events such as a request, release, contention or acquisition of a lock. The name of this event subsystem is a bit of a misnomer since these events are not quite related to lockdep but more generally to locking, ie: these events are not reporting lock dependencies or possible deadlock scenario but pure locking events. Hence this rename. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Acked-by: Hitoshi Mitake Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <1258103194-843-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/events/lock.h | 96 ++++++++++++++++++++++++++++++++++++++++++ include/trace/events/lockdep.h | 96 ------------------------------------------ kernel/lockdep.c | 2 +- 3 files changed, 97 insertions(+), 97 deletions(-) create mode 100644 include/trace/events/lock.h delete mode 100644 include/trace/events/lockdep.h (limited to 'include/trace') diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h new file mode 100644 index 00000000000..a870ba125aa --- /dev/null +++ b/include/trace/events/lock.h @@ -0,0 +1,96 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H + +#include +#include + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCK_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h deleted file mode 100644 index bcf1d209a00..00000000000 --- a/include/trace/events/lockdep.h +++ /dev/null @@ -1,96 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#ifdef CONFIG_LOCKDEP - -TRACE_EVENT(lock_acquire, - - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - - TP_STRUCT__entry( - __field(unsigned int, flags) - __string(name, lock->name) - ), - - TP_fast_assign( - __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); - __assign_str(name, lock->name); - ), - - TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", - (__entry->flags & 2) ? "read " : "", - __get_str(name)) -); - -TRACE_EVENT(lock_release, - - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - - TP_ARGS(lock, nested, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -#ifdef CONFIG_LOCK_STAT - -TRACE_EVENT(lock_contended, - - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - - TP_ARGS(lock, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __string(name, lock->name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __assign_str(name, lock->name); - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9af56723c09..f5dcd36d315 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -49,7 +49,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; -- cgit v1.2.3-70-g09d2 From 811cb50baf63461ce0bdb234927046131fc7fa8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Nov 2009 23:40:09 +0100 Subject: tracing: Fix event format export For some reason the export of the event print format to userspace uses '#fmt' which breaks if the format string is anything but a plain string, for example if it is built with macros then the macro names are exported instead of their contents. Use "\"%s\"", fmt instead of "%s", #fmt to export the string and not the way it is built. For example, in net/mac80211/driver-trace.h for the trace event drv_start there is: TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG ) Which use to produce: print fmt: LOCAL_PR_FMT, REC->wiphy_name Now produces: print fmt: "%s", REC->wiphy_name Signed-off-by: Johannes Berg LKML-Reference: <20091113224009.GB23942@elte.hu> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d9667e18..dacb8ef6700 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -159,7 +159,7 @@ #undef __get_str #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) +#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args -- cgit v1.2.3-70-g09d2 From beac2da7565e42be59963824899825d0cc624295 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:25:08 -0500 Subject: ext4: add tracepoint for ext4_forget() Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 1 + include/trace/events/ext4.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/trace') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 554c6798597..13de1dd751f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -89,6 +89,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, might_sleep(); + trace_ext4_forget(inode, is_metadata, blocknr); BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550bf3f9..b390e1fc4a7 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -907,6 +907,32 @@ TRACE_EVENT(ext4_mballoc_free, __entry->result_len, __entry->result_logical) ); +TRACE_EVENT(ext4_forget, + TP_PROTO(struct inode *inode, int is_metadata, __u64 block), + + TP_ARGS(inode, is_metadata, block), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( umode_t, mode ) + __field( int, is_metadata ) + __field( __u64, block ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->mode = inode->i_mode; + __entry->is_metadata = is_metadata; + __entry->block = block; + ), + + TP_printk("dev %s ino %lu mode %d is_metadata %d block %llu", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->mode, __entry->is_metadata, __entry->block) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3-70-g09d2 From 192dcf1d1775736627280a5dd4cb0f605b21857a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 18 Nov 2009 13:06:55 -0800 Subject: tracing: Remove the stale include/trace/power.h Commit 6161352 moved the power tracing to include/trace/events/, but left the old header behind. No one is using the old header, and its declarations are now incorrect, so it should be removed. Signed-off-by: Josh Stone Acked-by: Arjan van de Ven Cc: Frank Ch. Eigler Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258578415-14752-1-git-send-email-jistone@redhat.com> Signed-off-by: Ingo Molnar --- include/trace/power.h | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/trace/power.h (limited to 'include/trace') diff --git a/include/trace/power.h b/include/trace/power.h deleted file mode 100644 index ef204666e98..00000000000 --- a/include/trace/power.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _TRACE_POWER_H -#define _TRACE_POWER_H - -#include -#include - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { - ktime_t stamp; - ktime_t end; - int type; - int state; -}; - -DECLARE_TRACE(power_start, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_mark, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_end, - TP_PROTO(struct power_trace *it), - TP_ARGS(it)); - -#endif /* _TRACE_POWER_H */ -- cgit v1.2.3-70-g09d2 From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:26:55 +0100 Subject: tracing: Use the perf recursion protection from trace event When we commit a trace to perf, we first check if we are recursing in the same buffer so that we don't mess-up the buffer with a recursing trace. But later on, we do the same check from perf to avoid commit recursion. The recursion check is desired early before we touch the buffer but we want to do this check only once. Then export the recursion protection from perf and use it from the trace events before submitting a trace. v2: Put appropriate Reported-by tag Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 9 ++--- include/linux/perf_event.h | 4 +++ include/trace/ftrace.h | 23 +++++++------ kernel/perf_event.c | 68 +++++++++++++++++++++++++------------- kernel/trace/trace_event_profile.c | 14 ++++---- kernel/trace/trace_kprobe.c | 48 ++++++++++----------------- kernel/trace/trace_syscalls.c | 47 ++++++++++---------------- 7 files changed, 106 insertions(+), 107 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43360c1d8f7..47bbdf9c38d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,13 +137,8 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -struct perf_trace_buf { - char buf[FTRACE_MAX_PROFILE_SIZE]; - int recursion; -}; - -extern struct perf_trace_buf *perf_trace_buf; -extern struct perf_trace_buf *perf_trace_buf_nmi; +extern char *perf_trace_buf; +extern char *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36fe89f7264..74e98b1d339 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(int **recursion); +extern void perf_swevent_put_recursion_context(int *recursion); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static int perf_swevent_get_recursion_context(int **recursion) { return -1; } +static void perf_swevent_put_recursion_context(int *recursion) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4945d1c9986..c222ef5238b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + extern int perf_swevent_get_recursion_context(int **recursion); \ + extern void perf_swevent_put_recursion_context(int *recursion); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ - struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *trace_buf; \ char *raw_data; \ + int *recursion; \ int __cpu; \ int pc; \ \ @@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ + \ + if (perf_swevent_get_recursion_context(&recursion)) \ + goto end_recursion; \ + \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ @@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto) \ if (!trace_buf) \ goto end; \ \ - trace_buf = per_cpu_ptr(trace_buf, __cpu); \ - if (trace_buf->recursion++) \ - goto end_recursion; \ - \ - barrier(); \ - \ - raw_data = trace_buf->buf; \ + raw_data = per_cpu_ptr(trace_buf, __cpu); \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end_recursion: \ - trace_buf->recursion--; \ -end: \ +end: \ + perf_swevent_put_recursion_context(recursion); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 718fa939b1a..aba82272230 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3880,34 +3880,42 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +/* + * Must be called with preemption disabled + */ +int perf_swevent_get_recursion_context(int **recursion) { + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + if (in_nmi()) - return &cpuctx->recursion[3]; + *recursion = &cpuctx->recursion[3]; + else if (in_irq()) + *recursion = &cpuctx->recursion[2]; + else if (in_softirq()) + *recursion = &cpuctx->recursion[1]; + else + *recursion = &cpuctx->recursion[0]; - if (in_irq()) - return &cpuctx->recursion[2]; + if (**recursion) + return -1; - if (in_softirq()) - return &cpuctx->recursion[1]; + (**recursion)++; - return &cpuctx->recursion[0]; + return 0; } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +void perf_swevent_put_recursion_context(int *recursion) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swevent_recursion_context(cpuctx); - struct perf_event_context *ctx; - - if (*recursion) - goto out; + (*recursion)--; +} - (*recursion)++; - barrier(); +static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, @@ -3920,12 +3928,25 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (ctx) perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); +} - barrier(); - (*recursion)--; +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + int *recursion; + + preempt_disable(); + + if (perf_swevent_get_recursion_context(&recursion)) + goto out; + + __do_perf_sw_event(type, event_id, nr, nmi, data, regs); + perf_swevent_put_recursion_context(recursion); out: - put_cpu_var(perf_cpu_context); + preempt_enable(); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4159,7 +4180,8 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); - do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + /* Trace events already protected against recursion */ + __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index e0d351b01f5..d9c60f80aa0 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -9,31 +9,33 @@ #include "trace.h" -struct perf_trace_buf *perf_trace_buf; +char *perf_trace_buf; EXPORT_SYMBOL_GPL(perf_trace_buf); -struct perf_trace_buf *perf_trace_buf_nmi; +char *perf_trace_buf_nmi; EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; + /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf; + char *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf; rcu_assign_pointer(perf_trace_buf, buf); - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf_nmi; @@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf, *nmi_buf; + char *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3696476f307..22e6f68b05b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; char *raw_data; + int *recursion; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1227,6 +1228,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1237,18 +1242,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1263,9 +1257,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; @@ -1278,10 +1272,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; + int *recursion; char *raw_data; pc = preempt_count(); @@ -1297,6 +1292,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1307,18 +1306,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1334,9 +1322,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 51213b0aa81..0bb93487526 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,10 +477,11 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; - struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; + char *trace_buf; char *raw_data; + int *recursion; int syscall_nr; int size; int cpu; @@ -505,6 +506,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -515,18 +519,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -539,9 +532,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } @@ -588,10 +581,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; - struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; + char *trace_buf; char *raw_data; + int *recursion; int size; int cpu; @@ -617,6 +611,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -627,18 +625,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -652,9 +639,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From e6362609b6c71c5b802026be9cf263bbdd67a50e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:17:05 -0500 Subject: ext4: call ext4_forget() from ext4_free_blocks() Add the facility for ext4_forget() to be called from ext4_free_blocks(). This simplifies the code in a large number of places, and centralizes most of the work of calling ext4_forget() into a single place. Also fix a bug in the extents migration code; it wasn't calling ext4_forget() when releasing the indirect blocks during the conversion. As a result, if the system cashed during or shortly after the extents migration, and the released indirect blocks get reused as data blocks, the journal replay would corrupt the data blocks. With this new patch, fixing this bug was as simple as adding the EXT4_FREE_BLOCKS_FORGET flags to the call to ext4_free_blocks(). Signed-off-by: "Theodore Ts'o" Cc: "Aneesh Kumar K.V" --- fs/ext4/ext4.h | 10 +++++-- fs/ext4/extents.c | 24 ++++++---------- fs/ext4/inode.c | 67 ++++++++++++++++++--------------------------- fs/ext4/mballoc.c | 49 ++++++++++++++++++++++++--------- fs/ext4/migrate.c | 23 +++++++++++----- fs/ext4/xattr.c | 8 ++++-- include/trace/events/ext4.h | 16 ++++++----- 7 files changed, 109 insertions(+), 88 deletions(-) (limited to 'include/trace') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 210e1b53e91..4cfc2f0edb3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -375,6 +375,12 @@ struct ext4_new_group_data { #define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ EXT4_GET_BLOCKS_DIO_CREATE_EXT) +/* + * Flags used by ext4_free_blocks + */ +#define EXT4_FREE_BLOCKS_METADATA 0x0001 +#define EXT4_FREE_BLOCKS_FORGET 0x0002 + /* * ioctl commands */ @@ -1384,8 +1390,8 @@ extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata); + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74dcff84c3a..2c4a9321fb1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1007,7 +1007,8 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext4_free_blocks(handle, inode, ablocks[i], 1, 1); + ext4_free_blocks(handle, inode, 0, ablocks[i], 1, + EXT4_FREE_BLOCKS_METADATA); } } kfree(ablocks); @@ -1957,7 +1958,6 @@ errout: static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { - struct buffer_head *bh; int err; ext4_fsblk_t leaf; @@ -1973,9 +1973,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, if (err) return err; ext_debug("index is empty, remove it, free block %llu\n", leaf); - bh = sb_find_get_block(inode->i_sb, leaf); - ext4_forget(handle, 1, inode, bh, leaf); - ext4_free_blocks(handle, inode, leaf, 1, 1); + ext4_free_blocks(handle, inode, 0, leaf, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return err; } @@ -2042,12 +2041,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_lblk_t to) { - struct buffer_head *bh; unsigned short ee_len = ext4_ext_get_actual_len(ex); - int i, metadata = 0; + int flags = EXT4_FREE_BLOCKS_FORGET; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; + flags |= EXT4_FREE_BLOCKS_METADATA; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2072,11 +2070,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, num = le32_to_cpu(ex->ee_block) + ee_len - from; start = ext_pblock(ex) + ee_len - num; ext_debug("free last %u blocks starting %llu\n", num, start); - for (i = 0; i < num; i++) { - bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, metadata, inode, bh, start + i); - } - ext4_free_blocks(handle, inode, start, num, metadata); + ext4_free_blocks(handle, inode, 0, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", @@ -3319,8 +3313,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), + ext4_ext_get_actual_len(&newex), 0); goto out2; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 72c69432349..3b28e1fbfc9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -669,7 +669,7 @@ allocated: return ret; failed_out: for (i = 0; i < index; i++) - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); return ret; } @@ -765,20 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, return err; failed: /* Allocation failed, free what we already allocated */ + ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); for (i = 1; i <= n ; i++) { - BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); /* - * Note: is_metadata is 0 because branch[i].bh is - * newly allocated, so there is no need to revoke the - * block. If we do, it's harmless, but not necessary. + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. */ - ext4_forget(handle, 0, inode, branch[i].bh, - branch[i].bh->b_blocknr); + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, + EXT4_FREE_BLOCKS_FORGET); } - for (i = 0; i < indirect_blks; i++) - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); + for (i = n+1; i < indirect_blks; i++) + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); - ext4_free_blocks(handle, inode, new_blocks[i], num, 0); + ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); return err; } @@ -857,18 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, err_out: for (i = 1; i <= num; i++) { - BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); /* - * Note: is_metadata is 0 because branch[i].bh is - * newly allocated, so there is no need to revoke the - * block. If we do, it's harmless, but not necessary. + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. */ - ext4_forget(handle, 0, inode, where[i].bh, - where[i].bh->b_blocknr); - ext4_free_blocks(handle, inode, - le32_to_cpu(where[i-1].key), 1, 0); + ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); } - ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), + blks, 0); return err; } @@ -4080,7 +4078,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *last) { __le32 *p; - int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode); + int flags = EXT4_FREE_BLOCKS_FORGET; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + flags |= EXT4_FREE_BLOCKS_METADATA; if (try_to_extend_transaction(handle, inode)) { if (bh) { @@ -4096,27 +4097,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, } } - /* - * Any buffers which are on the journal will be in memory. We - * find them on the hash table so jbd2_journal_revoke() will - * run jbd2_journal_forget() on them. We've already detached - * each block from the file, so bforget() in - * jbd2_journal_forget() should be safe. - * - * AKPM: turn on bforget in jbd2_journal_forget()!!! - */ - for (p = first; p < last; p++) { - u32 nr = le32_to_cpu(*p); - if (nr) { - struct buffer_head *tbh; - - *p = 0; - tbh = sb_find_get_block(inode->i_sb, nr); - ext4_forget(handle, is_metadata, inode, tbh, nr); - } - } + for (p = first; p < last; p++) + *p = 0; - ext4_free_blocks(handle, inode, block_to_free, count, is_metadata); + ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); } /** @@ -4304,7 +4288,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, blocks_for_truncate(inode)); } - ext4_free_blocks(handle, inode, nr, 1, 1); + ext4_free_blocks(handle, inode, 0, nr, 1, + EXT4_FREE_BLOCKS_METADATA); if (parent_bh) { /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0dca90be1af..78de5d3c5dc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4436,8 +4436,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * @metadata: Are these metadata blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata) + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; @@ -4454,15 +4454,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, int err = 0; int ret; - /* - * We need to make sure we don't reuse the freed block until - * after the transaction is committed, which we can do by - * treating the block as metadata, below. We make an - * exception if the inode is to be written in writeback mode - * since writeback mode has weak data consistency guarantees. - */ - if (!ext4_should_writeback_data(inode)) - metadata = 1; + if (bh) { + if (block) + BUG_ON(block != bh->b_blocknr); + else + block = bh->b_blocknr; + } sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; @@ -4476,7 +4473,32 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } ext4_debug("freeing block %llu\n", block); - trace_ext4_free_blocks(inode, block, count, metadata); + trace_ext4_free_blocks(inode, block, count, flags); + + if (flags & EXT4_FREE_BLOCKS_FORGET) { + struct buffer_head *tbh = bh; + int i; + + BUG_ON(bh && (count > 1)); + + for (i = 0; i < count; i++) { + if (!bh) + tbh = sb_find_get_block(inode->i_sb, + block + i); + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, tbh, block + i); + } + } + + /* + * We need to make sure we don't reuse the freed block until + * after the transaction is committed, which we can do by + * treating the block as metadata, below. We make an + * exception if the inode is to be written in writeback mode + * since writeback mode has weak data consistency guarantees. + */ + if (!ext4_should_writeback_data(inode)) + flags |= EXT4_FREE_BLOCKS_METADATA; ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (ac) { @@ -4552,7 +4574,8 @@ do_more: err = ext4_mb_load_buddy(sb, block_group, &e4b); if (err) goto error_return; - if (metadata && ext4_handle_valid(handle)) { + + if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) { struct ext4_free_data *new_entry; /* * blocks being freed are metadata. these blocks shouldn't diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index a93d5b80f3e..d641e13e740 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle, for (i = 0; i < max_entries; i++) { if (tmp_idata[i]) { extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, - le32_to_cpu(tmp_idata[i]), 1, 1); + ext4_free_blocks(handle, inode, 0, + le32_to_cpu(tmp_idata[i]), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); } } put_bh(bh); extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); return 0; } @@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle, } put_bh(bh); extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); return 0; } @@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) /* ei->i_data[EXT4_IND_BLOCK] */ if (i_data[0]) { extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, - le32_to_cpu(i_data[0]), 1, 1); + ext4_free_blocks(handle, inode, 0, + le32_to_cpu(i_data[0]), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); } /* ei->i_data[EXT4_DIND_BLOCK] */ @@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, } put_bh(bh); extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, block, 1, 1); + ext4_free_blocks(handle, inode, 0, block, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return retval; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 025701926f9..910bf9a59cb 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ea_bdebug(bh, "refcount now=0; freeing"); if (ce) mb_cache_entry_free(ce); - ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1); get_bh(bh); - ext4_forget(handle, 1, inode, bh, bh->b_blocknr); + ext4_free_blocks(handle, inode, bh, 0, 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); error = ext4_handle_dirty_metadata(handle, inode, bh); @@ -832,7 +833,8 @@ inserted: new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: - ext4_free_blocks(handle, inode, block, 1, 1); + ext4_free_blocks(handle, inode, 0, block, 1, + EXT4_FREE_BLOCKS_METADATA); error = -EIO; goto cleanup; } diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b390e1fc4a7..74f628bfdd1 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -650,30 +650,32 @@ TRACE_EVENT(ext4_allocate_blocks, TRACE_EVENT(ext4_free_blocks, TP_PROTO(struct inode *inode, __u64 block, unsigned long count, - int metadata), + int flags), - TP_ARGS(inode, block, count, metadata), + TP_ARGS(inode, block, count, flags), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) + __field( umode_t, mode ) __field( __u64, block ) __field( unsigned long, count ) - __field( int, metadata ) - + __field( int, flags ) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; + __entry->mode = inode->i_mode; __entry->block = block; __entry->count = count; - __entry->metadata = metadata; + __entry->flags = flags; ), - TP_printk("dev %s ino %lu block %llu count %lu metadata %d", + TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->block, __entry->count, __entry->metadata) + __entry->mode, __entry->block, __entry->count, + __entry->flags) ); TRACE_EVENT(ext4_sync_file, -- cgit v1.2.3-70-g09d2 From 6eebee625544ac4ef1d805da942f463275bd6caa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 20:23:31 -0500 Subject: ext4: print i_mode in octal in ext4 tracepoints Inode permissions are much easier to understand if they are printed in octal. Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 74f628bfdd1..287347ca71b 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -38,7 +38,7 @@ TRACE_EVENT(ext4_free_inode, __entry->blocks = inode->i_blocks; ), - TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", + TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->uid, __entry->gid, (unsigned long long) __entry->blocks) @@ -61,7 +61,7 @@ TRACE_EVENT(ext4_request_inode, __entry->mode = mode; ), - TP_printk("dev %s dir %lu mode %d", + TP_printk("dev %s dir %lu mode 0%o", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir, __entry->mode) ); @@ -85,7 +85,7 @@ TRACE_EVENT(ext4_allocate_inode, __entry->mode = mode; ), - TP_printk("dev %s ino %lu dir %lu mode %d", + TP_printk("dev %s ino %lu dir %lu mode 0%o", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); @@ -930,7 +930,7 @@ TRACE_EVENT(ext4_forget, __entry->block = block; ), - TP_printk("dev %s ino %lu mode %d is_metadata %d block %llu", + TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->is_metadata, __entry->block) ); -- cgit v1.2.3-70-g09d2 From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:29 +0100 Subject: perf_events: Undo some recursion damage Make perf_swevent_get_recursion_context return a context number and disable preemption. This could be used to remove the IRQ disable from the trace bit and index the per-cpu buffer with. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <20091123103819.993226816@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 8 ++--- include/trace/ftrace.h | 17 ++++++----- kernel/perf_event.c | 71 +++++++++++++++++++------------------------ kernel/trace/trace_kprobe.c | 14 +++++---- kernel/trace/trace_syscalls.c | 14 +++++---- 5 files changed, 61 insertions(+), 63 deletions(-) (limited to 'include/trace') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 74e98b1d339..43adbd7f001 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(int **recursion); -extern void perf_swevent_put_recursion_context(int *recursion); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static int perf_swevent_get_recursion_context(int **recursion) { return -1; } -static void perf_swevent_put_recursion_context(int *recursion) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c222ef5238b..c3417c13e3e 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(int **recursion); \ - extern void perf_swevent_put_recursion_context(int *recursion); \ + extern int perf_swevent_get_recursion_context(void); \ + extern void perf_swevent_put_recursion_context(int rctx); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ @@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto) \ int __data_size; \ char *trace_buf; \ char *raw_data; \ - int *recursion; \ int __cpu; \ + int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto) \ \ local_irq_save(irq_flags); \ \ - if (perf_swevent_get_recursion_context(&recursion)) \ - goto end_recursion; \ + rctx = perf_swevent_get_recursion_context(); \ + if (rctx < 0) \ + goto end_recursion; \ \ __cpu = smp_processor_id(); \ \ @@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end: \ - perf_swevent_put_recursion_context(recursion); \ -end_recursion: \ +end: \ + perf_swevent_put_recursion_context(rctx); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 50f11b5f8c3..0b0d5f72fe7 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3869,45 +3869,50 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -/* - * Must be called with preemption disabled - */ -int perf_swevent_get_recursion_context(int **recursion) +int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int rctx; if (in_nmi()) - *recursion = &cpuctx->recursion[3]; + rctx = 3; else if (in_irq()) - *recursion = &cpuctx->recursion[2]; + rctx = 2; else if (in_softirq()) - *recursion = &cpuctx->recursion[1]; + rctx = 1; else - *recursion = &cpuctx->recursion[0]; + rctx = 0; - if (**recursion) + if (cpuctx->recursion[rctx]) { + put_cpu_var(perf_cpu_context); return -1; + } - (**recursion)++; + cpuctx->recursion[rctx]++; + barrier(); - return 0; + return rctx; } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); -void perf_swevent_put_recursion_context(int *recursion) +void perf_swevent_put_recursion_context(int rctx) { - (*recursion)--; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + barrier(); + cpuctx->recursion[rctx]++; + put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); -static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); @@ -3921,34 +3926,22 @@ static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, rcu_read_unlock(); } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - int *recursion; - - preempt_disable(); - - if (perf_swevent_get_recursion_context(&recursion)) - goto out; - - __do_perf_sw_event(type, event_id, nr, nmi, data, regs); - - perf_swevent_put_recursion_context(recursion); -out: - preempt_enable(); -} - void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; + int rctx; + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + return; data.addr = addr; data.raw = NULL; do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + + perf_swevent_put_recursion_context(rctx); } static void perf_swevent_read(struct perf_event *event) @@ -4172,7 +4165,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, regs = task_pt_regs(current); /* Trace events already protected against recursion */ - __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 22e6f68b05b..79ce6a2bd74 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1213,7 +1213,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, unsigned long irq_flags; char *trace_buf; char *raw_data; - int *recursion; + int rctx; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1229,7 +1229,8 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1258,7 +1259,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, perf_tp_event(call->id, entry->ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); @@ -1276,8 +1277,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, int size, __size, i, pc, __cpu; unsigned long irq_flags; char *trace_buf; - int *recursion; char *raw_data; + int rctx; pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); @@ -1293,7 +1294,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1323,7 +1325,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, perf_tp_event(call->id, entry->ret_ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 41b6dd963da..9189cbe8607 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -481,8 +481,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) unsigned long flags; char *trace_buf; char *raw_data; - int *recursion; int syscall_nr; + int rctx; int size; int cpu; @@ -506,7 +506,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -530,7 +531,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) perf_tp_event(sys_data->enter_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } @@ -582,7 +583,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) int syscall_nr; char *trace_buf; char *raw_data; - int *recursion; + int rctx; int size; int cpu; @@ -609,7 +610,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -634,7 +636,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } -- cgit v1.2.3-70-g09d2 From b4d7241596ffb6398ac5535ae8cf80d845b0c254 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 24 Nov 2009 11:15:08 -0500 Subject: ext4: remove encountered_congestion trace It is no longer set and scheduled to be removed. Signed-off-by: Wu Fengguang Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 287347ca71b..f4c62d33077 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -310,7 +310,6 @@ TRACE_EVENT(ext4_da_writepages_result, __field( int, ret ) __field( int, pages_written ) __field( long, pages_skipped ) - __field( char, encountered_congestion ) __field( char, more_io ) __field( char, no_nrwrite_index_update ) __field( pgoff_t, writeback_index ) @@ -322,17 +321,16 @@ TRACE_EVENT(ext4_da_writepages_result, __entry->ret = ret; __entry->pages_written = pages_written; __entry->pages_skipped = wbc->pages_skipped; - __entry->encountered_congestion = wbc->encountered_congestion; __entry->more_io = wbc->more_io; __entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update; __entry->writeback_index = inode->i_mapping->writeback_index; ), - TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu", + TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d no_nrwrite_index_update %d writeback_index %lu", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, - __entry->encountered_congestion, __entry->more_io, + __entry->more_io, __entry->no_nrwrite_index_update, (unsigned long) __entry->writeback_index) ); -- cgit v1.2.3-70-g09d2 From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:27:27 -0500 Subject: tracing: Create new TRACE_EVENT_TEMPLATE There are some places in the kernel that define several tracepoints and they are all identical besides the name. The code to enable, disable and record is created for every trace point even if most of the code is identical. This patch adds TRACE_EVENT_TEMPLATE that lets the developer create a template TRACE_EVENT and create trace points with DEFINE_EVENT, which is based off of a given template. Each trace point used by this will share most of the code, and bring down the size of the kernel when there are several duplicate events. Usage is: TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print); Which would be the same as defining a normal TRACE_EVENT. To create the trace events that the trace points will use: DEFINE_EVENT(template, name, proto, args) is done. The template is the name of the TRACE_EVENT_TEMPLATE to use. The name is the name of the trace point. The parameters proto and args must be the same as the proto and args of the template. If they are not the same, then a compile error will result. I tried hard removing this duplication but the C preprocessor is not powerful enough (or my CPP magic experience points is not at a high enough level) to not need them. A lot of trace events are coming in with new XFS development. Most of the trace points are identical except for the name. The following shows the advantage of having TRACE_EVENT_TEMPLATE: $ size fs/xfs/xfs.o.* text data bss dec hex filename 452114 2788 3520 458422 6feb6 fs/xfs/xfs.o.old 638482 38116 3744 680342 a6196 fs/xfs/xfs.o.template 996954 38116 4480 1039550 fdcbe fs/xfs/xfs.o.trace xfs.o.old is without any tracepoints. xfs.o.template uses the new TRACE_EVENT_TEMPLATE. xfs.o.trace uses the current TRACE_EVENT macros. Requested-by: Christoph Hellwig Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 4 ++ include/trace/define_trace.h | 6 ++ include/trace/ftrace.h | 149 +++++++++++++++++++++++++++++++------------ 3 files changed, 117 insertions(+), 42 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aac8a83e89..88a5b5a809e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2a4b3bf7403..244985814a4 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -31,6 +31,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -63,6 +67,8 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_TEMPLATE +#undef DEFINE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c3417c13e3e..2969f65d800 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,26 @@ #include +/* + * TRACE_EVENT_TEMPLATE can be used to add a generic function + * handlers for events. That is, if all events have the same + * parameters and just have distinct trace points. + * Each tracepoint can be defined with DEFINE_EVENT and that + * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * + * TRACE_EVENT is a one to one mapping between tracepoint and template. + */ +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + TRACE_EVENT_TEMPLATE(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + + #undef __field #define __field(type, item) type item; @@ -36,13 +56,15 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - char __data[0]; \ - }; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name #undef __cpparg @@ -89,12 +111,15 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -170,8 +195,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +ftrace_raw_output_id_##call(int event_id, const char *name, \ + struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ @@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ entry = iter->ent; \ \ - if (entry->type != event_##call.id) { \ + if (entry->type != event_id) { \ WARN_ON_ONCE(1); \ return TRACE_TYPE_UNHANDLED; \ } \ @@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ p = &get_cpu_var(ftrace_event_seq); \ trace_seq_init(p); \ - ret = trace_seq_printf(s, #call ": " print); \ + ret = trace_seq_printf(s, "%s: ", name); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ +static enum print_line_t \ +ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ +{ \ + return ftrace_raw_output_id_##template(event_##name.id, \ + #name, iter, flags); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call( \ return __data_size; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ \ -static void ftrace_profile_##call(proto); \ +static void ftrace_profile_##name(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ +static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\ { \ - return register_trace_##call(ftrace_profile_##call); \ + return register_trace_##name(ftrace_profile_##name); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ +static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ { \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##name(ftrace_profile_##name); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - \ -static struct ftrace_event_call event_##call; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_raw_event_##call(proto) \ +static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ struct ring_buffer *buffer; \ @@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(&buffer, \ - event_##call.id, \ + event_call->id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto) \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(buffer, \ event, irq_flags, pc); \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + ftrace_raw_event_id_##template(&event_##call, args); \ } \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ @@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ + .show_format = ftrace_format_##template, \ + .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -static void ftrace_profile_##call(proto) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +static void \ +ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ extern int perf_swevent_get_recursion_context(void); \ extern void perf_swevent_put_recursion_context(int rctx); \ - struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ @@ -789,6 +845,15 @@ end_recursion: \ \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + \ + ftrace_profile_templ_##template(event_call, args); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.2.3-70-g09d2 From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:36:26 -0500 Subject: tracing: Create new DEFINE_EVENT_PRINT After creating the TRACE_EVENT_TEMPLATE I started to look at other trace points to see what duplication was made. I noticed that there are several trace points where they are almost identical except for the name and the output format. Since TRACE_EVENT_TEMPLATE was successful in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT. DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the developer to overwrite the print format. If there are two or more TRACE_EVENTS that are identical except for the name and print, then they can be converted to use a TRACE_EVENT_TEMPLATE. Since the TRACE_EVENT_TEMPLATE already does the print output, the first trace event would have its print format held in the TRACE_EVENT_TEMPLATE and be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT and override the print format. Converting the sched trace points to both DEFINE_EVENT and DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were converted to DEFINE_EVENT_PRINT. I was able to get the following: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is the scheduler compiled with no trace points. sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT sched.o-trace is the current trace events. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 2 + include/trace/define_trace.h | 5 ++ include/trace/ftrace.h | 123 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 126 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88a5b5a809e..7063383cca1 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 244985814a4..5d7d855ae21 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -35,6 +35,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -69,6 +73,7 @@ #undef TRACE_EVENT_FN #undef TRACE_EVENT_TEMPLATE #undef DEFINE_EVENT +#undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2969f65d800..b0461772bc8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -67,6 +67,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef __cpparg #define __cpparg(arg...) arg @@ -120,6 +124,10 @@ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -198,15 +206,28 @@ #undef TRACE_EVENT_TEMPLATE #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_setup_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: " print); \ + return ret; \ +} \ + \ +static int \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##call(unused, s); \ + if (!ret) \ + return ret; \ + \ + ret = trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } @@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ +static int \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##template(unused, s); \ + if (!ret) \ + return ret; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ #name, iter, flags); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ +static enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##template *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, "%s: ", #call); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call( \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ unregister_trace_##name(ftrace_profile_##name); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif @@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ -} \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##template, \ + _TRACE_PROFILE_INIT(call) \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto) \ ftrace_profile_templ_##template(event_call, args); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v1.2.3-70-g09d2 From 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:48:08 -0500 Subject: tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT Converting some of the scheduler trace events to use the TRACE_EVENT_TEMPLATE, DEFINE_EVENT and DEFINE_EVENT_PRINT helped to save some space: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is without any tracepoints compiled sched.o-templ is with this patch sched.o-trace is the tracepoints before this patch The trace events converted to DEFINE_EVENT: sched_wakeup, sched_wakeup_new, sched_process_free, sched_process_exit, and sched_stat_wait. The trace events converted to DEFINE_EVENT_PRINT: sched_stat_sleep and sched_stat_iowait. Note, since the TRACE_EVENT_TEMPLATE always uses a print, the sched_stat_wait print format is defined in the template and this template is used by sched_stat_sleep and sched_stat_iowait. But the later two override the print format. Signed-off-by: Steven Rostedt --- include/trace/events/sched.h | 170 +++++++++++++------------------------------ 1 file changed, 52 insertions(+), 118 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b50b9856c59..238f74b5848 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +TRACE_EVENT_TEMPLATE(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup, __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); + /* * Tracepoint for waking up a new task: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, target_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->target_cpu = task_cpu(p); - ), - - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); /* * Tracepoint for task switches, performed by the scheduler: @@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +TRACE_EVENT_TEMPLATE(sched_process_template, TP_PROTO(struct task_struct *p), @@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free, ); /* - * Tracepoint for a task exiting: + * Tracepoint for freeing a task: */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) -); +/* + * Tracepoint for a task exiting: + */ +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -348,12 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +TRACE_EVENT_TEMPLATE(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -379,6 +342,37 @@ TRACE_EVENT(sched_stat_wait, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). @@ -412,66 +406,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -/* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). - */ -TRACE_EVENT(sched_stat_sleep, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.2.3-70-g09d2 From b8007ef7422270864eae523cb38d7522a53a94d3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Nov 2009 13:45:32 +0800 Subject: tracing: Separate raw syscall from syscall tracer The current syscall tracer mixes raw syscalls and real syscalls. echo 1 > events/syscalls/enable And we get these from the output: (XXXX insteads " grep-20914 [001] 588211.446347" .. etc) XXXX: sys_read(fd: 3, buf: 80609a8, count: 7000) XXXX: sys_enter: NR 3 (3, 80609a8, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 XXXX: sys_read(fd: 3, buf: 8060ae0, count: 7000) XXXX: sys_enter: NR 3 (3, 8060ae0, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 There are 2 drawbacks here. A) two almost identical records are saved in ringbuffer when a syscall enters or exits. (4 records for every syscall) This wastes precious space in the ring buffer. B) the lines including "sys_enter/sys_exit" produces hardly any useful information for the output (no labels). The user can use this method to prevent these drawbacks: echo 1 > events/syscalls/enable echo 0 > events/syscalls/sys_enter/enable echo 0 > events/syscalls/sys_exit/enable But this is not user friendly. So we separate raw syscall from syscall tracer. After this fix applied: syscall tracer's output (echo 1 > events/syscalls/enable): XXXX: sys_read(fd: 3, buf: bfe87d88, count: 200) XXXX: sys_read -> 0x200 XXXX: sys_fstat64(fd: 3, statbuf: bfe87c98) XXXX: sys_fstat64 -> 0x0 XXXX: sys_close(fd: 3) raw syscall tracer's output (echo 1 > events/raw_syscalls/enable): XXXX: sys_enter: NR 175 (0, bf92bf18, bf92bf98, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 175 (2, bf92bf98, 0, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 3 (9, bf927f9c, 4000, b77e2518, b77dce60, bf92bff8) Signed-off-by: Lai Jiangshan LKML-Reference: <4AEFC37C.5080609@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/events/syscalls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 397dff2dbd5..fb726ac7cae 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM -#define TRACE_SYSTEM syscalls +#define TRACE_SYSTEM raw_syscalls +#define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EVENTS_SYSCALLS_H -- cgit v1.2.3-70-g09d2 From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 09:04:55 +0100 Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS() It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE does: does it define an event as well beyond defining a template? To clarify this, rename it to DECLARE_EVENT_CLASS, which follows the various 'DECLARE_*()' idioms we already have in the kernel: DECLARE_EVENT_CLASS(class) DEFINE_EVENT(class, event1) DEFINE_EVENT(class, event2) DEFINE_EVENT(class, event3) To complete this logic we should also rename TRACE_EVENT() to: DEFINE_SINGLE_EVENT(single_event) ... but in a more quiet moment of the kernel cycle. Cc: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/sched.h | 6 +++--- include/trace/ftrace.h | 46 ++++++++++++++++++++++---------------------- 4 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7063383cca1..f59604ed0ec 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5d7d855ae21..5acfb1eb4df 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -71,7 +71,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN -#undef TRACE_EVENT_TEMPLATE +#undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 238f74b5848..5ce79502185 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT_TEMPLATE(sched_wakeup_template, +DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -TRACE_EVENT_TEMPLATE(sched_process_template, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -TRACE_EVENT_TEMPLATE(sched_stat_template, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b0461772bc8..2c9c073e45a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -19,17 +19,17 @@ #include /* - * TRACE_EVENT_TEMPLATE can be used to add a generic function + * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same * parameters and just have distinct trace points. * Each tracepoint can be defined with DEFINE_EVENT and that - * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * will map the DECLARE_EVENT_CLASS to the tracepoint. * * TRACE_EVENT is a one to one mapping between tracepoint and template. */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - TRACE_EVENT_TEMPLATE(name, \ + DECLARE_EVENT_CLASS(name, \ PARAMS(proto), \ PARAMS(args), \ PARAMS(tstruct), \ @@ -56,8 +56,8 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ @@ -115,8 +115,8 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -203,8 +203,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_setup_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ ftrace_raw_output_id_##call(int event_id, const char *name, \ struct trace_iterator *iter, int flags) \ @@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __string #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ @@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ proto) \ @@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static void \ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ -- cgit v1.2.3-70-g09d2 From 925684d6d589e40e41007edf47c69e729d911263 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:03:23 +0800 Subject: tracing: Convert module refcnt events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 29854 1980 128 31962 7cda kernel/module.o.old 28750 1980 128 30858 788a kernel/module.o Two events are converted: module_refcnt: module_get, module_put No change in functionality. Signed-off-by: Li Zefan Cc: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E283B.3010508@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/module.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 84160fb1847..4b0f48ba16a 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,7 +51,7 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); -TRACE_EVENT(module_get, +DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), @@ -73,26 +73,18 @@ TRACE_EVENT(module_get, __get_str(name), (void *)__entry->ip, __entry->refcnt) ); -TRACE_EVENT(module_put, +DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip, refcnt) +); - TP_STRUCT__entry( - __field( unsigned long, ip ) - __field( int, refcnt ) - __string( name, mod->name ) - ), +DEFINE_EVENT(module_refcnt, module_put, - TP_fast_assign( - __entry->ip = ip; - __entry->refcnt = refcnt; - __assign_str(name, mod->name); - ), + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_printk("%s call_site=%pf refcnt=%d", - __get_str(name), (void *)__entry->ip, __entry->refcnt) + TP_ARGS(mod, ip, refcnt) ); TRACE_EVENT(module_request, -- cgit v1.2.3-70-g09d2 From 53d0422c2d10808fddb2c30859193bfea164c7e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:10 +0800 Subject: tracing: Convert some kmem events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 333987 69800 27228 431015 693a7 mm/built-in.o.old 330030 69800 27228 427058 68432 mm/built-in.o 8 events are converted: kmem_alloc: kmalloc, kmem_cache_alloc kmem_alloc_node: kmalloc_node, kmem_cache_alloc_node kmem_free: kfree, kmem_cache_free mm_page: mm_page_alloc_zone_locked, mm_page_pcpu_drain No change in functionality. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mel Gorman LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/kmem.h | 130 ++++++++++++++------------------------------ mm/page_alloc.c | 4 +- mm/util.c | 3 - 3 files changed, 43 insertions(+), 94 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eaf46bdd18a..3adca0ca9db 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -44,7 +44,7 @@ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ ) : "GFP_NOWAIT" -TRACE_EVENT(kmalloc, +DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, @@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(kmem_cache_alloc, +DEFINE_EVENT(kmem_alloc, kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); -TRACE_EVENT(kmalloc_node, +DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, @@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node, __entry->node) ); -TRACE_EVENT(kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc_node, kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), +DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->node) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -TRACE_EVENT(kfree, +DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), @@ -207,23 +169,18 @@ TRACE_EVENT(kfree, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); -TRACE_EVENT(kmem_cache_free, +DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), + TP_ARGS(call_site, ptr) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), +DEFINE_EVENT(kmem_free, kmem_cache_free, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), + TP_PROTO(unsigned long call_site, const void *ptr), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free_direct, @@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(mm_page_alloc_zone_locked, +DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), @@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked, __entry->order == 0) ); -TRACE_EVENT(mm_page_pcpu_drain, +DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, - TP_PROTO(struct page *page, int order, int migratetype), + TP_PROTO(struct page *page, unsigned int order, int migratetype), - TP_ARGS(page, order, migratetype), + TP_ARGS(page, order, migratetype) +); - TP_STRUCT__entry( - __field( struct page *, page ) - __field( int, order ) - __field( int, migratetype ) - ), +DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, - TP_fast_assign( - __entry->page = page; - __entry->order = order; - __entry->migratetype = migratetype; - ), + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", - __entry->page, - page_to_pfn(__entry->page), - __entry->order, - __entry->migratetype) + __entry->page, page_to_pfn(__entry->page), + __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac63f41..bdb22f55d00 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,12 +48,14 @@ #include #include #include -#include #include #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include + /* * Array of node states. */ diff --git a/mm/util.c b/mm/util.c index 7c35ad95f92..15d197571b4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,9 +6,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate -- cgit v1.2.3-70-g09d2 From c467307c1a812c3150b27a68c2b2d3397bb40a4f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:31 +0800 Subject: tracing: Convert softirq events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 12781 952 36 13769 35c9 kernel/softirq.o.old 11981 952 32 12965 32a5 kernel/softirq.o Two events are converted: softirq: softirq_entry, softirq_exit No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E287F.4030708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/irq.h | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index dcfcd440762..0e4cfb694fe 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -/** - * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array - * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. - */ -TRACE_EVENT(softirq_entry, +DECLARE_EVENT_CLASS(softirq, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -111,6 +100,24 @@ TRACE_EVENT(softirq_entry, show_softirq_name(__entry->vec)) ); +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +DEFINE_EVENT(softirq, softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + /** * softirq_exit - called immediately after the softirq handler returns * @h: pointer to struct softirq_action @@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry, * combination with the softirq_entry tracepoint we can determine the softirq * latency. */ -TRACE_EVENT(softirq_exit, +DEFINE_EVENT(softirq, softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - - TP_STRUCT__entry( - __field( int, vec ) - ), - - TP_fast_assign( - __entry->vec = (int)(h - vec); - ), - - TP_printk("vec=%d [action=%s]", __entry->vec, - show_softirq_name(__entry->vec)) + TP_ARGS(h, vec) ); #endif /* _TRACE_IRQ_H */ -- cgit v1.2.3-70-g09d2 From 382ece710bf88b08440b598731361e5a47582b62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:03 +0800 Subject: tracing: Convert some workqueue events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 13171 800 72 14043 36db kernel/workqueue.o.old 12243 800 68 13111 3337 kernel/workqueue.o Two events are converted: workqueue: workqueue_insertion, workqueue_execution No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E289F.5010104@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/workqueue.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index e4612dbd7ba..d6c974474e7 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(workqueue_insertion, +DECLARE_EVENT_CLASS(workqueue, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), @@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion, __entry->thread_pid, __entry->func) ); -TRACE_EVENT(workqueue_execution, +DEFINE_EVENT(workqueue, workqueue_insertion, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work), + TP_ARGS(wq_thread, work) +); - TP_STRUCT__entry( - __array(char, thread_comm, TASK_COMM_LEN) - __field(pid_t, thread_pid) - __field(work_func_t, func) - ), +DEFINE_EVENT(workqueue, workqueue_execution, - TP_fast_assign( - memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); - __entry->thread_pid = wq_thread->pid; - __entry->func = work->func; - ), + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, - __entry->thread_pid, __entry->func) + TP_ARGS(wq_thread, work) ); /* Trace the creation of one workqueue thread on a cpu */ -- cgit v1.2.3-70-g09d2 From 7703466b4c0a21b88d701882bef0d45bcb0a0281 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:38 +0800 Subject: tracing: Convert some power events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 4312 524 12 4848 12f0 kernel/trace/power-traces.o.old 3455 524 8 3987 f93 kernel/trace/power-traces.o Two events are converted: power: power_start, power_frequency No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: <4B0E28C2.1090906@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9bb96e5a284..c4efe9b8280 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,7 +16,7 @@ enum { }; #endif -TRACE_EVENT(power_start, +DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state), @@ -35,42 +35,36 @@ TRACE_EVENT(power_start, TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) ); -TRACE_EVENT(power_end, - - TP_PROTO(int dummy), +DEFINE_EVENT(power, power_start, - TP_ARGS(dummy), + TP_PROTO(unsigned int type, unsigned int state), - TP_STRUCT__entry( - __field( u64, dummy ) - ), + TP_ARGS(type, state) +); - TP_fast_assign( - __entry->dummy = 0xffff; - ), +DEFINE_EVENT(power, power_frequency, - TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + TP_PROTO(unsigned int type, unsigned int state), + TP_ARGS(type, state) ); +TRACE_EVENT(power_end, -TRACE_EVENT(power_frequency, - - TP_PROTO(unsigned int type, unsigned int state), + TP_PROTO(int dummy), - TP_ARGS(type, state), + TP_ARGS(dummy), TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) + __field( u64, dummy ) ), TP_fast_assign( - __entry->type = type; - __entry->state = state; + __entry->dummy = 0xffff; ), - TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + ); #endif /* _TRACE_POWER_H */ -- cgit v1.2.3-70-g09d2 From 77ca1e0294f25fc26053ba14353e703158acef26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:14 +0800 Subject: tracing: Convert some block events to DEFINE_EVENT use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 53570 3284 184 57038 dece block/blk-core.o.old 43702 3284 144 47130 b81a block/blk-core.o 12 events are converted: block_rq: block_rq_insert, block_rq_issue block_rq_with_error: block_rq_{abort, requeue, complete} block_bio: block_bio_{backmerge, frontmerge, queue} block_get_rq: block_getrq, block_sleeprq block_unplug: block_unplug_timer, block_unplug_io No change in functionality. Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E28E6.7060609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/block.h | 202 +++++++++---------------------------------- 1 file changed, 42 insertions(+), 160 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 00405b5f624..5fb72733331 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(block_rq_abort, +DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort, __entry->nr_sector, __entry->errors) ); -TRACE_EVENT(block_rq_insert, +DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(q, rq) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), +DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + TP_PROTO(struct request_queue *q, struct request *rq), - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_ARGS(q, rq) +); - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_rq_with_error, block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_issue, +DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), @@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue, __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) + __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_rq_requeue, +DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_complete, +DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); TRACE_EVENT(block_bio_bounce, @@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -TRACE_EVENT(block_bio_backmerge, +DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_bio_frontmerge, +DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_bio_queue, +DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, bio) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_bio, block_bio_queue, - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_getrq, +DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_sleeprq, +DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw), + TP_ARGS(q, bio, rw) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_get_rq, block_sleeprq, - TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_rw : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio, rw) ); TRACE_EVENT(block_plug, @@ -387,7 +274,7 @@ TRACE_EVENT(block_plug, TP_printk("[%s]", __entry->comm) ); -TRACE_EVENT(block_unplug_timer, +DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q), @@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -TRACE_EVENT(block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), - TP_ARGS(q), + TP_ARGS(q) +); - TP_STRUCT__entry( - __field( int, nr_rq ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_unplug, block_unplug_io, - TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q), - TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) + TP_ARGS(q) ); TRACE_EVENT(block_split, -- cgit v1.2.3-70-g09d2 From 071688f36e7eba3e37b2fc48e35bfdab99b80b4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:55 +0800 Subject: tracing: Convert some jbd2 events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 34903 1693 448 37044 90b4 fs/jbd2/journal.o.old 31931 1693 416 34040 84f8 fs/jbd2/journal.o Four events are converted: jbd2_commit: jbd2_start_commit, jbd2_commit_{locking, flushing, logging} No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E290F.7030909@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/jbd2.h | 63 ++++++++------------------------------------- 1 file changed, 11 insertions(+), 52 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 3c60b75adb9..96b370a050d 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint, jbd2_dev_to_name(__entry->dev), __entry->result) ); -TRACE_EVENT(jbd2_start_commit, +DECLARE_EVENT_CLASS(jbd2_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), @@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit, __entry->sync_commit) ); -TRACE_EVENT(jbd2_commit_locking, +DEFINE_EVENT(jbd2_commit, jbd2_start_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_flushing, +DEFINE_EVENT(jbd2_commit, jbd2_commit_locking, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_logging, +DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), + TP_ARGS(journal, commit_transaction) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), +DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); TRACE_EVENT(jbd2_end_commit, -- cgit v1.2.3-70-g09d2 From b5eb34c3592545c756e50d882c08417eb60740a7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:07:36 +0800 Subject: tracing: Convert some ext4 events to DEFINE_TRACE Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 294695 6104 340 301139 49853 fs/ext4/ext4.o.old 289983 6104 324 296411 485db fs/ext4/ext4.o 5 events are convertd: ext4__write_begin: ext4_write_begin, ext4_da_write_begin ext4__write_end: ext4_{ordered, writeback, journalled}_write_end No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2938.2040708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/ext4.h | 129 ++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 94 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550bf3f9..318f76535bd 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode, (unsigned long) __entry->dir, __entry->mode) ); -TRACE_EVENT(ext4_write_begin, +DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), @@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin, __entry->pos, __entry->len, __entry->flags) ); -TRACE_EVENT(ext4_ordered_write_end, +DEFINE_EVENT(ext4__write_begin, ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end, __entry->pos, __entry->len, __entry->copied) ); -TRACE_EVENT(ext4_writeback_write_end, +DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), + TP_ARGS(inode, pos, len, copied) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), +DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end, - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_journalled_write_end, +DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), + TP_ARGS(inode, pos, len, copied) +); - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), +DEFINE_EVENT(ext4__write_end, ext4_da_write_end, - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepage, @@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->flags) -); - -TRACE_EVENT(ext4_da_write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) -); - TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v1.2.3-70-g09d2 From 470dda7417f284b9cfc96560b2acd98df63798a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:08:01 +0800 Subject: tracing: Restore original format of sched events The original format for sched_stat_iowait and sched_stat_sleep: $ cat events/sched/sched_stat_iowait/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... But commit commit 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 ("tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT") broke the format: $ cat events/sched/sched_stat_iowait/format print fmt: "task: %s:%d iowait: %Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format print fmt: "task: %s:%d sleep: %Lu [ns]", ... No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2951.9050800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5ce79502185..9d316b22388 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -355,23 +355,17 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait, * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d sleep: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting runtime (time the task is executing -- cgit v1.2.3-70-g09d2 From d1eb650ff4130972fa21462fa49cd35a2865403b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:45 -0500 Subject: tracepoint: Move signal sending tracepoint to events/signal.h Move signal sending event to events/signal.h. This patch also renames sched_signal_send event to signal_generate. Changes in v4: - Fix a typo of task_struct pointer. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Add siginfo storing macro Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215645.30449.60208.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/tracepoint.tmpl | 5 +++ include/trace/events/sched.h | 25 ------------- include/trace/events/signal.h | 66 +++++++++++++++++++++++++++++++++++ kernel/signal.c | 5 +-- 4 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 include/trace/events/signal.h (limited to 'include/trace') diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0fd57..8bca1d5cec0 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -86,4 +86,9 @@ !Iinclude/trace/events/irq.h + + SIGNAL +!Iinclude/trace/events/signal.h + + diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9d316b22388..cfceb0b73e2 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -287,31 +287,6 @@ TRACE_EVENT(sched_process_fork, __entry->child_comm, __entry->child_pid) ); -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig=%d comm=%s pid=%d", - __entry->sig, __entry->comm, __entry->pid) -); - /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h new file mode 100644 index 00000000000..ef51756a801 --- /dev/null +++ b/include/trace/events/signal.h @@ -0,0 +1,66 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM signal + +#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SIGNAL_H + +#include +#include +#include + +#define TP_STORE_SIGINFO(__entry, info) \ + do { \ + if (info == SEND_SIG_NOINFO) { \ + __entry->errno = 0; \ + __entry->code = SI_USER; \ + } else if (info == SEND_SIG_PRIV) { \ + __entry->errno = 0; \ + __entry->code = SI_KERNEL; \ + } else { \ + __entry->errno = info->si_errno; \ + __entry->code = info->si_code; \ + } \ + } while (0) + +/** + * signal_generate - called when a signal is generated + * @sig: signal number + * @info: pointer to struct siginfo + * @task: pointer to struct task_struct + * + * Current process sends a 'sig' signal to 'task' process with + * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, + * 'info' is not a pointer and you can't access its field. Instead, + * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV + * means that si_code is SI_KERNEL. + */ +TRACE_EVENT(signal_generate, + + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + + TP_ARGS(sig, info, task), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + ), + + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + __entry->sig, __entry->errno, __entry->code, + __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SIGNAL_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/signal.c b/kernel/signal.c index 6705320784f..a1e0cc6b32c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,8 @@ #include #include #include -#include +#define CREATE_TRACE_POINTS +#include #include #include @@ -834,7 +835,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigqueue *q; int override_rlimit; - trace_sched_signal_send(sig, t); + trace_signal_generate(sig, info, t); assert_spin_locked(&t->sighand->siglock); -- cgit v1.2.3-70-g09d2 From f9d4257e01d266e67420cc99d456b6d4c8464f54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:51 -0500 Subject: tracepoint: Add signal deliver event Add a tracepoint where a process gets a signal. This tracepoint shows signal-number, sa-handler and sa-flag. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Fix comment Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215651.30449.20926.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 39 +++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 3 +++ 2 files changed, 42 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index ef51756a801..a6d71de0dc0 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -60,6 +60,45 @@ TRACE_EVENT(signal_generate, __entry->comm, __entry->pid) ); +/** + * signal_deliver - called when a signal is delivered + * @sig: signal number + * @info: pointer to struct siginfo + * @ka: pointer to struct k_sigaction + * + * A 'sig' signal is delivered to current process with 'info' siginfo, + * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or + * SIG_DFL. + * Note that some signals reported by signal_generate tracepoint can be + * lost, ignored or modified (by debugger) before hitting this tracepoint. + * This means, this can show which signals are actually delivered, but + * matching generated signals and delivered signals may not be correct. + */ +TRACE_EVENT(signal_deliver, + + TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka), + + TP_ARGS(sig, info, ka), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __field( unsigned long, sa_handler ) + __field( unsigned long, sa_flags ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + __entry->sa_handler = (unsigned long)ka->sa.sa_handler; + __entry->sa_flags = ka->sa.sa_flags; + ), + + TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", + __entry->sig, __entry->errno, __entry->code, + __entry->sa_handler, __entry->sa_flags) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index a1e0cc6b32c..349d4493740 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1840,6 +1840,9 @@ relock: ka = &sighand->action[signr-1]; } + /* Trace actually delivered signals. */ + trace_signal_deliver(signr, info, ka); + if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { -- cgit v1.2.3-70-g09d2 From ba005e1f417295d28cd1563ab82bc33af07fb16a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:58 -0500 Subject: tracepoint: Add signal loss events Add signal_overflow_fail and signal_lose_info tracepoints for signal-lost events. Changes in v3: - Add docbook style comments Changes in v2: - Use siginfo string macro Suggested-by: Roland McGrath Reviewed-by: Jason Baron Signed-off-by: Masami Hiramatsu Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215658.30449.9934.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/trace/events/signal.h | 68 +++++++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 19 ++++++++---- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index a6d71de0dc0..a510b75ac30 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -99,6 +99,74 @@ TRACE_EVENT(signal_deliver, __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); + +/** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +TRACE_EVENT(signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); + +/** + * signal_lose_info - called when siginfo is lost + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo + * queue is overflow. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of non-RT signals. + */ +TRACE_EVENT(signal_lose_info, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index 349d4493740..93e72e5feae 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -897,12 +897,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, break; } } else if (!is_si_special(info)) { - if (sig >= SIGRTMIN && info->si_code != SI_USER) - /* - * Queue overflow, abort. We may abort if the signal was rt - * and sent by user using something other than kill(). - */ + if (sig >= SIGRTMIN && info->si_code != SI_USER) { + /* + * Queue overflow, abort. We may abort if the + * signal was rt and sent by user using something + * other than kill(). + */ + trace_signal_overflow_fail(sig, group, info); return -EAGAIN; + } else { + /* + * This is a silent loss of information. We still + * send the signal, but the *info bits are lost. + */ + trace_signal_lose_info(sig, group, info); + } } out_set: -- cgit v1.2.3-70-g09d2 From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:20 +0800 Subject: trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit fix event_enter_##sname->event fix event_exit_##sname->event remove unused event_syscall_enter and event_syscall_exit Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D278.4090209@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 2 -- kernel/trace/trace_syscalls.c | 8 -------- 3 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b50974a93af..2f7c539ab96 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .system = "syscalls", \ - .event = &event_syscall_enter, \ + .event = &enter_syscall_print_##sname, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ @@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .system = "syscalls", \ - .event = &event_syscall_exit, \ + .event = &exit_syscall_print_##sname, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 51ee17d3632..5f8827c92db 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -extern struct trace_event event_syscall_enter; -extern struct trace_event event_syscall_exit; extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 63aa8070365..00d6e176f5b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -444,14 +444,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -struct trace_event event_syscall_enter = { - .trace = print_syscall_enter, -}; - -struct trace_event event_syscall_exit = { - .trace = print_syscall_exit, -}; - int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.2.3-70-g09d2 From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:30 +0800 Subject: trace_syscalls: Set event_enter_##sname->data to its metadata Set event_enter_##sname->data to its metadata, it makes codes simpler. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D282.7050709@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 6 ++++-- include/trace/syscall.h | 2 +- kernel/trace/trace_syscalls.c | 36 +++++++++++------------------------- 3 files changed, 16 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2f7c539ab96..d3c9fd01a11 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ @@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ @@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5f8827c92db..c5265c81c4e 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,7 +34,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(char *name); +extern int syscall_name_to_nr(const char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 00d6e176f5b..39649b1675d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(char *name) +int syscall_name_to_nr(const char *name) { int i; @@ -172,18 +172,11 @@ extern char *__bad_type_size(void); int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { int i; - int nr; int ret; - struct syscall_metadata *entry; + struct syscall_metadata *entry = call->data; struct syscall_trace_enter trace; int offset = offsetof(struct syscall_trace_enter, args); - nr = syscall_name_to_nr(call->data); - entry = syscall_nr_to_meta(nr); - - if (!entry) - return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); @@ -245,18 +238,11 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; - struct syscall_metadata *meta; + struct syscall_metadata *meta = call->data; int ret; - int nr; int i; int offset = offsetof(typeof(trace), args); - nr = syscall_name_to_nr(call->data); - meta = syscall_nr_to_meta(nr); - - if (!meta) - return 0; - ret = trace_define_common_fields(call); if (ret) return ret; @@ -366,9 +352,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -389,9 +375,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; @@ -407,9 +393,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -430,9 +416,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; -- cgit v1.2.3-70-g09d2 From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:36 +0800 Subject: trace_syscalls: Remove enter_id exit_id use ->enter_event->id instead of ->enter_id use ->exit_event->id instead of ->exit_id Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D288.7030001@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 2 -- include/trace/syscall.h | 6 ------ kernel/trace/trace_syscalls.c | 30 ++++++++++-------------------- 3 files changed, 10 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d3c9fd01a11..b9af87560ad 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_enter_##sname.id = id; \ - set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ return 0; \ } \ @@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_exit_##sname.id = id; \ - set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ return 0; \ } \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index c5265c81c4e..ca09561cd57 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -15,8 +15,6 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) - * @enter_id: associated ftrace enter event id - * @exit_id: associated ftrace exit event id * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ @@ -25,8 +23,6 @@ struct syscall_metadata { int nb_args; const char **types; const char **args; - int enter_id; - int exit_id; struct ftrace_event_call *enter_event; struct ftrace_event_call *exit_event; @@ -35,8 +31,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(const char *name); -void set_syscall_enter_id(int num, int id); -void set_syscall_exit_id(int num, int id); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 39649b1675d..27eb18d6922 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -67,16 +67,6 @@ int syscall_name_to_nr(const char *name) return -1; } -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) -{ - syscalls_metadata[num]->exit_id = id; -} - enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -93,7 +83,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags) if (!entry) goto end; - if (entry->enter_id != ent->type) { + if (entry->enter_event->id != ent->type) { WARN_ON_ONCE(1); goto end; } @@ -148,7 +138,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } - if (entry->exit_id != ent->type) { + if (entry->exit_event->id != ent->type) { WARN_ON_ONCE(1); return TRACE_TYPE_UNHANDLED; } @@ -302,8 +292,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, - size, 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->enter_event->id, size, 0, 0); if (!event) return; @@ -334,8 +324,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, - sizeof(*entry), 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->exit_event->id, sizeof(*entry), 0, 0); if (!event) return; @@ -510,11 +500,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; + rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); @@ -615,11 +605,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec = (struct syscall_trace_exit *)raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_id; + rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); -- cgit v1.2.3-70-g09d2 From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:47 +0800 Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata Add syscall_nr field to struct syscall_metadata, it helps us to get syscall number easier. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D293.6090800@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 4 ++-- include/trace/syscall.h | 3 ++- kernel/trace/trace_syscalls.c | 22 +++++++++------------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b9af87560ad..3c280d7ecb7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&enter_syscall_print_##sname);\ @@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&exit_syscall_print_##sname);\ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index ca09561cd57..1531eef3071 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -12,6 +12,7 @@ * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall + * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) @@ -20,6 +21,7 @@ */ struct syscall_metadata { const char *name; + int syscall_nr; int nb_args; const char **types; const char **args; @@ -30,7 +32,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(const char *name); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 27eb18d6922..144cc14d855 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(const char *name) +static int syscall_name_to_nr(const char *name) { int i; @@ -342,10 +342,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -365,10 +363,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -383,10 +379,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -406,10 +400,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -436,6 +428,10 @@ int __init init_ftrace_syscalls(void) for (i = 0; i < NR_syscalls; i++) { addr = arch_syscall_addr(i); meta = find_syscall_meta(addr); + if (!meta) + continue; + + meta->syscall_nr = i; syscalls_metadata[i] = meta; } -- cgit v1.2.3-70-g09d2 From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:55 +0800 Subject: trace_syscalls: Remove duplicate init_enter_##sname() use only one init_syscall_trace instead of many init_enter_##sname()/init_exit_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 30 ++---------------------------- include/trace/syscall.h | 1 + kernel/trace/trace_syscalls.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3c280d7ecb7..cf0d923ea40 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&enter_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_enter_##sname.id = id; \ - INIT_LIST_HEAD(&event_enter_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_enter_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&exit_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_exit_##sname.id = id; \ - INIT_LIST_HEAD(&event_exit_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_exit_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 1531eef3071..dff9371e527 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,6 +32,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); +extern int init_syscall_trace(struct ftrace_event_call *call); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 144cc14d855..c6514093c95 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -412,6 +412,18 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } +int init_syscall_trace(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + return 0; +} + int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.2.3-70-g09d2 From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:24:01 +0800 Subject: trace_syscalls: Simplify syscall profile use only one prof_sysenter_enable() instead of prof_sysenter_enable_##sname() use only one prof_sysenter_disable() instead of prof_sysenter_disable_##sname() use only one prof_sysexit_enable() instead of prof_sysexit_enable_##sname() use only one prof_sysexit_disable() instead of prof_sysexit_disable_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 31 ++++--------------------------- include/trace/syscall.h | 8 ++++---- kernel/trace/trace_syscalls.c | 24 ++++++++---------------- 3 files changed, 16 insertions(+), 47 deletions(-) (limited to 'include/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cf0d923ea40..c2df3a59323 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,37 +99,16 @@ struct perf_event_attr; #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) #ifdef CONFIG_EVENT_PROFILE -#define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_enter("sys"#sname); \ -} \ - \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_enter("sys"#sname); \ -} - -#define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_exit("sys"#sname); \ -} \ - \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_exit("sys"#sname); \ -} #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysenter_enable_##sname, \ - .profile_disable = prof_sysenter_disable_##sname, + .profile_enable = prof_sysenter_enable, \ + .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysexit_enable_##sname, \ - .profile_disable = prof_sysexit_disable_##sname, + .profile_enable = prof_sysexit_enable, \ + .profile_disable = prof_sysexit_disable, #else #define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PROFILE_INIT(sname) @@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index dff9371e527..961fda3556b 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #ifdef CONFIG_EVENT_PROFILE -int reg_prof_syscall_enter(char *name); -void unreg_prof_syscall_enter(char *name); -int reg_prof_syscall_exit(char *name); -void unreg_prof_syscall_exit(char *name); +int prof_sysenter_enable(struct ftrace_event_call *call); +void prof_sysenter_disable(struct ftrace_event_call *call); +int prof_sysexit_enable(struct ftrace_event_call *call); +void prof_sysexit_disable(struct ftrace_event_call *call); #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6514093c95..1e85b6cc26a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -520,14 +520,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_enter(char *name) +int prof_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_enter) @@ -543,13 +541,11 @@ int reg_prof_syscall_enter(char *name) return ret; } -void unreg_prof_syscall_enter(char *name) +void prof_sysenter_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_enter--; @@ -625,14 +621,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_exit(char *name) +int prof_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_exit) @@ -648,13 +642,11 @@ int reg_prof_syscall_exit(char *name) return ret; } -void unreg_prof_syscall_exit(char *name) +void prof_sysexit_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_exit--; -- cgit v1.2.3-70-g09d2 From e9c0748b687aa70179a9e6d8ffc24b2874fe350b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2009 13:23:19 +0100 Subject: itimer: Fix the itimer trace print format Compiling powerpc64 results in: include/trace/events/timer.h:279: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'cputime_t' .... cputime_t on power is u64, which triggers the above warning. Cast the cputime_t to unsigned long long and fix the print format string. That works on both 32 and 64 bit architectures. While at it change the print format for long variables from %lu to %ld. Signed-off-by: Thomas Gleixner Cc: Xiao Guangrong --- include/trace/events/timer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index e5ce87a0498..9496b965d62 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -301,8 +301,8 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", - __entry->which, __entry->expires, + TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld", + __entry->which, (unsigned long long)__entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) ); @@ -331,8 +331,8 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which=%d pid=%d now=%lu", __entry->which, - (int) __entry->pid, __entry->now) + TP_printk("which=%d pid=%d now=%llu", __entry->which, + (int) __entry->pid, (unsigned long long)__entry->now) ); #endif /* _TRACE_TIMER_H */ -- cgit v1.2.3-70-g09d2 From 4819568f23a8bef0ca99b740ca60fe2450ab0aac Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 12 Dec 2009 13:06:13 -0800 Subject: ftrace.h: Use common pr_info fmt string Reduces fmt string space a bit. Signed-off-by: Joe Perches Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1260651974.2637.4.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index d1b3de9c1a7..c4eca380204 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -733,7 +733,7 @@ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ ret = register_trace_##call(ftrace_raw_event_##call); \ if (ret) \ pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ + "probe to %s\n", #call); \ return ret; \ } \ \ -- cgit v1.2.3-70-g09d2 From 87d9b4e1c52867a45331a9a5495f6448e0c68b23 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:14:20 +0800 Subject: tracing: Extract duplicate ftrace_raw_init_event_foo() Use a generic trace_event_raw_init() function for all event's raw_init callbacks (but kprobes) instead of defining the same version for each of these. This shrinks the kernel code: text data bss dec hex filename 5355293 1961928 7103260 14420481 dc0a01 vmlinux.o.old 5346802 1961864 7103260 14411926 dbe896 vmlinux.o raw_init can't be removed, because ftrace events and kprobe events use different raw_init callbacks. Though it's possible to totally remove raw_init, I choose to leave it as it is for now. Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Jason Baron Cc: Ingo Molnar LKML-Reference: <4B1DC48C.7080603@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 1 + include/linux/syscalls.h | 4 ++-- include/trace/ftrace.h | 35 ++++------------------------------- kernel/trace/trace_events.c | 14 ++++++++++++++ 4 files changed, 21 insertions(+), 33 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 38f8d655383..ea44b891109 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -158,6 +158,7 @@ enum { FILTER_PTR_STRING, }; +extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_common_fields(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index bc70c5810fe..94ac28437be 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -145,7 +145,7 @@ struct perf_event_attr; .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_syscall_trace, \ + .raw_init = trace_event_raw_init, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -167,7 +167,7 @@ struct perf_event_attr; .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_syscall_trace, \ + .raw_init = trace_event_raw_init, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c4eca380204..6055b0604c8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -623,23 +623,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) - * { - * int id; - * - * id = register_ftrace_event(&ftrace_event_type_); - * if (!id) - * return -ENODEV; - * event_.id = id; - * return 0; - * } - * * static struct ftrace_event_call __used * __attribute__((__aligned__(4))) * __attribute__((section("_ftrace_events"))) event_ = { * .name = "", * .system = "", - * .raw_init = ftrace_raw_init_event_, + * .raw_init = trace_event_raw_init, * .regfunc = ftrace_reg_event_, * .unregfunc = ftrace_unreg_event_, * .show_format = ftrace_format_, @@ -647,9 +636,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * */ -#undef TP_FMT -#define TP_FMT(fmt, args...) fmt "\n", ##args - #ifdef CONFIG_EVENT_PROFILE #define _TRACE_PROFILE_INIT(call) \ @@ -744,19 +730,7 @@ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ \ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ -}; \ - \ -static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ -{ \ - int id; \ - \ - id = register_ftrace_event(&ftrace_event_type_##call); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - INIT_LIST_HEAD(&event_##call.fields); \ - return 0; \ -} +}; #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ @@ -776,7 +750,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .event = &ftrace_event_type_##call, \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##template, \ @@ -793,7 +767,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ .event = &ftrace_event_type_##call, \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ @@ -953,7 +927,6 @@ end: \ perf_swevent_put_recursion_context(rctx); \ end_recursion: \ local_irq_restore(irq_flags); \ - \ } #undef DEFINE_EVENT diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1d18315dc83..8ed66e0d476 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -105,6 +105,20 @@ void trace_destroy_fields(struct ftrace_event_call *call) } } +int trace_event_raw_init(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + + return 0; +} +EXPORT_SYMBOL_GPL(trace_event_raw_init); + static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { -- cgit v1.2.3-70-g09d2 From 614a71a26ba3d97e9fa85649db69a682b78e407d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:14:36 +0800 Subject: tracing: Pull up calls to trace_define_common_fields() Call trace_define_common_fields() in event_create_dir() only. This avoids trace events to handle it from their define_fields callbacks and shrinks the kernel code size: text data bss dec hex filename 5346802 1961864 7103260 14411926 dbe896 vmlinux.o.old 5345151 1961864 7103260 14410275 dbe223 vmlinux.o Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Ingo Molnar Cc: Jason Baron Cc: Masami Hiramatsu LKML-Reference: <4B1DC49C.8000107@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 1 - include/trace/ftrace.h | 4 ---- kernel/trace/trace_events.c | 7 ++++--- kernel/trace/trace_export.c | 4 ---- kernel/trace/trace_kprobe.c | 8 -------- kernel/trace/trace_syscalls.c | 8 -------- 6 files changed, 4 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ea44b891109..db97c64ce0e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -159,7 +159,6 @@ enum { }; extern int trace_event_raw_init(struct ftrace_event_call *call); -extern int trace_define_common_fields(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 6055b0604c8..2af2f7a2c1b 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -436,10 +436,6 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ struct ftrace_raw_##call field; \ int ret; \ \ - ret = trace_define_common_fields(event_call); \ - if (ret) \ - return ret; \ - \ tstruct; \ \ return ret; \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8ed66e0d476..97b0b3aa166 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field); if (ret) \ return ret; -int trace_define_common_fields(struct ftrace_event_call *call) +static int trace_define_common_fields(struct ftrace_event_call *call) { int ret; struct trace_entry ent; @@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call) return ret; } -EXPORT_SYMBOL_GPL(trace_define_common_fields); void trace_destroy_fields(struct ftrace_event_call *call) { @@ -927,7 +926,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, id); if (call->define_fields) { - ret = call->define_fields(call); + ret = trace_define_common_fields(call); + if (!ret) + ret = call->define_fields(call); if (ret < 0) { pr_warning("Could not initialize trace point" " events/%s\n", call->name); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index dff8c84ddf1..458e5bfe26d 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ struct struct_name field; \ int ret; \ \ - ret = trace_define_common_fields(event_call); \ - if (ret) \ - return ret; \ - \ tstruct; \ \ return ret; \ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index aff5f80b59b..e3c80e92589 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1113,10 +1113,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) struct kprobe_trace_entry field; struct trace_probe *tp = (struct trace_probe *)event_call->data; - ret = trace_define_common_fields(event_call); - if (!ret) - return ret; - DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); /* Set argument names as fields */ @@ -1131,10 +1127,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) struct kretprobe_trace_entry field; struct trace_probe *tp = (struct trace_probe *)event_call->data; - ret = trace_define_common_fields(event_call); - if (!ret) - return ret; - DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 57501d90096..b957edd0ca3 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) int i; int offset = offsetof(typeof(trace), args); - ret = trace_define_common_fields(call); - if (ret) - return ret; - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); if (ret) return ret; @@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) struct syscall_trace_exit trace; int ret; - ret = trace_define_common_fields(call); - if (ret) - return ret; - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); if (ret) return ret; -- cgit v1.2.3-70-g09d2 From 3b8e4273814a7f9e9a74ece517d9206fea919aaa Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:14:52 +0800 Subject: tracing: Move a printk out of ftrace_raw_reg_event_foo() Move the printk from each ftrace_raw_reg_event_foo() to its caller ftrace_event_enable_disable(). This avoids each regfunc trace event callbacks to handle a same error report that can be carried from the caller. See how much space this saves: text data bss dec hex filename 5345151 1961864 7103260 14410275 dbe223 vmlinux.o.old 5331487 1961864 7103260 14396611 dbacc3 vmlinux.o Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Jason Baron LKML-Reference: <4B1DC4AC.802@cn.fujitsu.com> [start cmdline record before calling regfunc to avoid lost window of pid to comm resolution] Signed-off-by: Frederic Weisbecker --- include/trace/ftrace.h | 16 ++-------------- kernel/trace/trace_events.c | 20 +++++++++++++++----- kernel/trace/trace_syscalls.c | 10 ++-------- 3 files changed, 19 insertions(+), 27 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2af2f7a2c1b..0c21af85211 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -555,13 +555,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { - * int ret; - * - * ret = register_trace_(ftrace_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; + * return register_trace_(ftrace_event_); * } * * static void ftrace_unreg_event_(struct ftrace_event_call *unused) @@ -710,13 +704,7 @@ static void ftrace_raw_event_##call(proto) \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ - int ret; \ - \ - ret = register_trace_##call(ftrace_raw_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to %s\n", #call); \ - return ret; \ + return register_trace_##call(ftrace_raw_event_##call); \ } \ \ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 97b0b3aa166..189b09baf4f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -118,9 +118,11 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); -static void ftrace_event_enable_disable(struct ftrace_event_call *call, +static int ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { + int ret = 0; + switch (enable) { case 0: if (call->enabled) { @@ -131,12 +133,20 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, break; case 1: if (!call->enabled) { - call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(call); + ret = call->regfunc(call); + if (ret) { + tracing_stop_cmdline_record(); + pr_info("event trace: Could not enable event " + "%s\n", call->name); + break; + } + call->enabled = 1; } break; } + + return ret; } static void ftrace_clear_events(void) @@ -415,7 +425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 0: case 1: mutex_lock(&event_mutex); - ftrace_event_enable_disable(call, val); + ret = ftrace_event_enable_disable(call, val); mutex_unlock(&event_mutex); break; @@ -425,7 +435,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, *ppos += cnt; - return cnt; + return ret ? ret : cnt; } static ssize_t diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b957edd0ca3..75289f372dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -325,10 +325,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) ret = register_trace_sys_enter(ftrace_syscall_enter); - if (ret) { - pr_info("event trace: Could not activate" - "syscall entry trace point"); - } else { + if (!ret) { set_bit(num, enabled_enter_syscalls); sys_refcount_enter++; } @@ -362,10 +359,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) ret = register_trace_sys_exit(ftrace_syscall_exit); - if (ret) { - pr_info("event trace: Could not activate" - "syscall exit trace point"); - } else { + if (!ret) { set_bit(num, enabled_exit_syscalls); sys_refcount_exit++; } -- cgit v1.2.3-70-g09d2 From e00bf2ec60605eb95687b7a0c3b83c87c48541dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Dec 2009 11:17:29 +0800 Subject: tracing: Change event->profile_count to be int type Like total_profile_count, struct ftrace_event_call::profile_count is protected by event_mutex, so it doesn't need to be atomic_t. Signed-off-by: Li Zefan Acked-by: Steven Rostedt Cc: Jason Baron Cc: Masami Hiramatsu Cc: Peter Zijlstra LKML-Reference: <4B1DC549.5010705@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 2 +- include/linux/syscalls.h | 2 -- include/trace/ftrace.h | 1 - kernel/trace/trace_event_profile.c | 6 +++--- kernel/trace/trace_kprobe.c | 1 - 5 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index db97c64ce0e..2233c98d80d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -131,7 +131,7 @@ struct ftrace_event_call { void *mod; void *data; - atomic_t profile_count; + int profile_count; int (*profile_enable)(struct ftrace_event_call *); void (*profile_disable)(struct ftrace_event_call *); }; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 94ac28437be..72d69860d90 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -102,12 +102,10 @@ struct perf_event_attr; #ifdef CONFIG_EVENT_PROFILE #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ - .profile_count = ATOMIC_INIT(-1), \ .profile_enable = prof_sysenter_enable, \ .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ - .profile_count = ATOMIC_INIT(-1), \ .profile_enable = prof_sysexit_enable, \ .profile_disable = prof_sysexit_disable, #else diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0c21af85211..73523151a73 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -629,7 +629,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #ifdef CONFIG_EVENT_PROFILE #define _TRACE_PROFILE_INIT(call) \ - .profile_count = ATOMIC_INIT(-1), \ .profile_enable = ftrace_profile_enable_##call, \ .profile_disable = ftrace_profile_disable_##call, diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index d9c60f80aa0..9e25573242c 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) char *buf; int ret = -ENOMEM; - if (atomic_inc_return(&event->profile_count)) + if (event->profile_count++ > 0) return 0; if (!total_profile_count) { @@ -56,7 +56,7 @@ fail_buf_nmi: perf_trace_buf = NULL; } fail_buf: - atomic_dec(&event->profile_count); + event->profile_count--; return ret; } @@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) { char *buf, *nmi_buf; - if (!atomic_add_negative(-1, &event->profile_count)) + if (--event->profile_count > 0) return; event->profile_disable(event); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e3c80e92589..6ed223447a3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1426,7 +1426,6 @@ static int register_probe_event(struct trace_probe *tp) call->unregfunc = probe_event_disable; #ifdef CONFIG_EVENT_PROFILE - atomic_set(&call->profile_count, -1); call->profile_enable = probe_profile_enable; call->profile_disable = probe_profile_disable; #endif -- cgit v1.2.3-70-g09d2 From fb7ae981cb9fe8665b9da97e8734745e030c151d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:38 +0800 Subject: tracing: Fix sign fields in ftrace_define_fields_##call() Add is_signed_type() call to trace_define_field() in ftrace macros. The code previously just passed in 0 (false), disregarding whether or not the field was actually a signed type. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D3A.6020007@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 7 ++++--- kernel/trace/trace_export.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 73523151a73..c6fe03e902c 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #define __dynamic_array(type, item, len) \ ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), 0, \ - FILTER_OTHER); + sizeof(field.__data_loc_##item), \ + is_signed_type(type), FILTER_OTHER); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 458e5bfe26d..d4fa5dc1ee4 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0, FILTER_OTHER); \ + sizeof(field.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; @@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), \ container.item), \ - sizeof(field.container.item), 0, \ - FILTER_OTHER); \ + sizeof(field.container.item), \ + is_signed_type(type), FILTER_OTHER); \ if (ret) \ return ret; -- cgit v1.2.3-70-g09d2