From e4a9ea5ee7c8812a7bf0c3fb725ceeaa3d4c2fcc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 09:15:30 -0500 Subject: tracing: Replace trace_event struct array with pointer array Currently the trace_event structures are placed in the _ftrace_events section, and at link time, the linker makes one large array of all the trace_event structures. On boot up, this array is read (much like the initcall sections) and the events are processed. The problem is that there is no guarantee that gcc will place complex structures nicely together in an array format. Two structures in the same file may be placed awkwardly, because gcc has no clue that they are suppose to be in an array. A hack was used previous to force the alignment to 4, to pack the structures together. But this caused alignment issues with other architectures (sparc). Instead of packing the structures into an array, the structures' addresses are now put into the _ftrace_event section. As pointers are always the natural alignment, gcc should always pack them tightly together (otherwise initcall, extable, etc would also fail). By having the pointers to the structures in the section, we can still iterate the trace_events without causing unnecessary alignment problems with other architectures, or depending on the current behaviour of gcc that will likely change in the future just to tick us kernel developers off a little more. The _ftrace_event section is also moved into the .init.data section as it is now only needed at boot up. Suggested-by: David Miller Cc: Mathieu Desnoyers Acked-by: David S. Miller Signed-off-by: Steven Rostedt --- include/linux/module.h | 2 +- include/linux/syscalls.h | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index e7c6385c668..7695a303bb5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -389,7 +389,7 @@ struct module unsigned int num_trace_bprintk_fmt; #endif #ifdef CONFIG_EVENT_TRACING - struct ftrace_event_call *trace_events; + struct ftrace_event_call **trace_events; unsigned int num_trace_events; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18cd0684fc4..45508fec366 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -128,28 +128,30 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_events"))) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ }; \ + static struct ftrace_event_call __used \ + __attribute__((section("_ftrace_events"))) \ + *__event_enter_##sname = &event_enter_##sname; \ __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata \ __attribute__((__aligned__(4))) __syscall_meta_##sname; \ static struct ftrace_event_call __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_events"))) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ }; \ + static struct ftrace_event_call __used \ + __attribute__((section("_ftrace_events"))) \ + *__event_exit_##sname = &event_exit_##sname; \ __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_METADATA(sname, nb) \ -- cgit v1.2.3-70-g09d2 From 654986462939cd7ec18f276c6379a334dac106a7 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 26 Jan 2011 17:26:22 -0500 Subject: tracepoints: Fix section alignment using pointer array Make the tracepoints more robust, making them solid enough to handle compiler changes by not relying on anything based on compiler-specific behavior with respect to structure alignment. Implement an approach proposed by David Miller: use an array of const pointers to refer to the individual structures, and export this pointer array through the linker script rather than the structures per se. It will consume 32 extra bytes per tracepoint (24 for structure padding and 8 for the pointers), but are less likely to break due to compiler changes. History: commit 7e066fb8 tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() added the aligned(32) type and variable attribute to the tracepoint structures to deal with gcc happily aligning statically defined structures on 32-byte multiples. One attempt was to use a 8-byte alignment for tracepoint structures by applying both the variable and type attribute to tracepoint structures definitions and declarations. It worked fine with gcc 4.5.1, but broke with gcc 4.4.4 and 4.4.5. The reason is that the "aligned" attribute only specify the _minimum_ alignment for a structure, leaving both the compiler and the linker free to align on larger multiples. Because tracepoint.c expects the structures to be placed as an array within each section, up-alignment cause NULL-pointer exceptions due to the extra unexpected padding. (this patch applies on top of -tip) Signed-off-by: Mathieu Desnoyers Acked-by: David S. Miller LKML-Reference: <20110126222622.GA10794@Krystal> CC: Frederic Weisbecker CC: Ingo Molnar CC: Thomas Gleixner CC: Andrew Morton CC: Peter Zijlstra CC: Rusty Russell Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 8 +++++--- include/linux/module.h | 2 +- include/linux/tracepoint.h | 35 ++++++++++++++++++++--------------- kernel/module.c | 16 ++++++++-------- kernel/tracepoint.c | 31 ++++++++++++++++--------------- 5 files changed, 50 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f53708be95e..57b1b6811b6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -166,10 +166,8 @@ CPU_KEEP(exit.data) \ MEM_KEEP(init.data) \ MEM_KEEP(exit.data) \ - . = ALIGN(32); \ - VMLINUX_SYMBOL(__start___tracepoints) = .; \ + STRUCT_ALIGN(); \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; \ /* implement dynamic printk debug */ \ . = ALIGN(8); \ VMLINUX_SYMBOL(__start___verbose) = .; \ @@ -218,6 +216,10 @@ VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ *(__vermagic) /* Kernel version magic */ \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .; \ + *(__tracepoints_ptrs) /* Tracepoints: pointer array */\ + VMLINUX_SYMBOL(__stop___tracepoints_ptrs) = .; \ *(__markers_strings) /* Markers: strings */ \ *(__tracepoints_strings)/* Tracepoints: strings */ \ } \ diff --git a/include/linux/module.h b/include/linux/module.h index 7695a303bb5..9bdf27c7615 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -377,7 +377,7 @@ struct module keeping pointers to this stuff */ char *args; #ifdef CONFIG_TRACEPOINTS - struct tracepoint *tracepoints; + struct tracepoint * const *tracepoints_ptrs; unsigned int num_tracepoints; #endif #ifdef HAVE_JUMP_LABEL diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c6814616653..97c84a58efb 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -33,12 +33,7 @@ struct tracepoint { void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; -} __attribute__((aligned(32))); /* - * Aligned on 32 bytes because it is - * globally visible and gcc happily - * align these on the structure size. - * Keep in sync with vmlinux.lds.h. - */ +}; /* * Connect a probe to a tracepoint. @@ -61,15 +56,15 @@ extern void tracepoint_probe_update_all(void); struct tracepoint_iter { struct module *module; - struct tracepoint *tracepoint; + struct tracepoint * const *tracepoint; }; extern void tracepoint_iter_start(struct tracepoint_iter *iter); extern void tracepoint_iter_next(struct tracepoint_iter *iter); extern void tracepoint_iter_stop(struct tracepoint_iter *iter); extern void tracepoint_iter_reset(struct tracepoint_iter *iter); -extern int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end); +extern int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, + struct tracepoint * const *begin, struct tracepoint * const *end); /* * tracepoint_synchronize_unregister must be called between the last tracepoint @@ -84,11 +79,13 @@ static inline void tracepoint_synchronize_unregister(void) #define PARAMS(args...) args #ifdef CONFIG_TRACEPOINTS -extern void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end); +extern +void tracepoint_update_probe_range(struct tracepoint * const *begin, + struct tracepoint * const *end); #else -static inline void tracepoint_update_probe_range(struct tracepoint *begin, - struct tracepoint *end) +static inline +void tracepoint_update_probe_range(struct tracepoint * const *begin, + struct tracepoint * const *end) { } #endif /* CONFIG_TRACEPOINTS */ @@ -174,12 +171,20 @@ do_trace: \ { \ } +/* + * We have no guarantee that gcc and the linker won't up-align the tracepoint + * structures, so we create an array of pointers that will be used for iteration + * on the tracepoints. + */ #define DEFINE_TRACE_FN(name, reg, unreg) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(32))) = \ - { __tpstrtab_##name, 0, reg, unreg, NULL } + __attribute__((section("__tracepoints"))) = \ + { __tpstrtab_##name, 0, reg, unreg, NULL }; \ + static struct tracepoint * const __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name; #define DEFINE_TRACE(name) \ DEFINE_TRACE_FN(name, NULL, NULL); diff --git a/kernel/module.c b/kernel/module.c index 34e00b708fa..efa290ea94b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_TRACEPOINTS - mod->tracepoints = section_objs(info, "__tracepoints", - sizeof(*mod->tracepoints), - &mod->num_tracepoints); + mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", + sizeof(*mod->tracepoints_ptrs), + &mod->num_tracepoints); #endif #ifdef HAVE_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", @@ -3393,7 +3393,7 @@ void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, - struct tracepoint *tp) + struct tracepoint * const *tp) { } EXPORT_SYMBOL(module_layout); @@ -3407,8 +3407,8 @@ void module_update_tracepoints(void) mutex_lock(&module_mutex); list_for_each_entry(mod, &modules, list) if (!mod->taints) - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); + tracepoint_update_probe_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); mutex_unlock(&module_mutex); } @@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter) else if (iter_mod > iter->module) iter->tracepoint = NULL; found = tracepoint_get_iter_range(&iter->tracepoint, - iter_mod->tracepoints, - iter_mod->tracepoints + iter_mod->tracepoints_ptrs, + iter_mod->tracepoints_ptrs + iter_mod->num_tracepoints); if (found) { iter->module = iter_mod; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e95ee7f31d4..68187af4889 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -27,8 +27,8 @@ #include #include -extern struct tracepoint __start___tracepoints[]; -extern struct tracepoint __stop___tracepoints[]; +extern struct tracepoint * const __start___tracepoints_ptrs[]; +extern struct tracepoint * const __stop___tracepoints_ptrs[]; /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem) * * Updates the probe callback corresponding to a range of tracepoints. */ -void -tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) +void tracepoint_update_probe_range(struct tracepoint * const *begin, + struct tracepoint * const *end) { - struct tracepoint *iter; + struct tracepoint * const *iter; struct tracepoint_entry *mark_entry; if (!begin) @@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) mutex_lock(&tracepoints_mutex); for (iter = begin; iter < end; iter++) { - mark_entry = get_tracepoint(iter->name); + mark_entry = get_tracepoint((*iter)->name); if (mark_entry) { - set_tracepoint(&mark_entry, iter, + set_tracepoint(&mark_entry, *iter, !!mark_entry->refcount); } else { - disable_tracepoint(iter); + disable_tracepoint(*iter); } } mutex_unlock(&tracepoints_mutex); @@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) static void tracepoint_update_probes(void) { /* Core kernel tracepoints */ - tracepoint_update_probe_range(__start___tracepoints, - __stop___tracepoints); + tracepoint_update_probe_range(__start___tracepoints_ptrs, + __stop___tracepoints_ptrs); /* tracepoints in modules. */ module_update_tracepoints(); } @@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); * Will return the first tracepoint in the range if the input tracepoint is * NULL. */ -int tracepoint_get_iter_range(struct tracepoint **tracepoint, - struct tracepoint *begin, struct tracepoint *end) +int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, + struct tracepoint * const *begin, struct tracepoint * const *end) { if (!*tracepoint && begin != end) { *tracepoint = begin; @@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter) /* Core kernel tracepoints */ if (!iter->module) { found = tracepoint_get_iter_range(&iter->tracepoint, - __start___tracepoints, __stop___tracepoints); + __start___tracepoints_ptrs, + __stop___tracepoints_ptrs); if (found) goto end; } @@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self, switch (val) { case MODULE_STATE_COMING: case MODULE_STATE_GOING: - tracepoint_update_probe_range(mod->tracepoints, - mod->tracepoints + mod->num_tracepoints); + tracepoint_update_probe_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints); break; } return 0; -- cgit v1.2.3-70-g09d2 From 3d56e331b6537671c66f1b510bed0f1e0331dfc8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Feb 2011 17:06:09 -0500 Subject: tracing: Replace syscall_meta_data struct array with pointer array Currently the syscall_meta structures for the syscall tracepoints are placed in the __syscall_metadata section, and at link time, the linker makes one large array of all these syscall metadata structures. On boot up, this array is read (much like the initcall sections) and the syscall data is processed. The problem is that there is no guarantee that gcc will place complex structures nicely together in an array format. Two structures in the same file may be placed awkwardly, because gcc has no clue that they are suppose to be in an array. A hack was used previous to force the alignment to 4, to pack the structures together. But this caused alignment issues with other architectures (sparc). Instead of packing the structures into an array, the structures' addresses are now put into the __syscall_metadata section. As pointers are always the natural alignment, gcc should always pack them tightly together (otherwise initcall, extable, etc would also fail). By having the pointers to the structures in the section, we can still iterate the trace_events without causing unnecessary alignment problems with other architectures, or depending on the current behaviour of gcc that will likely change in the future just to tick us kernel developers off a little more. The __syscall_metadata section is also moved into the .init.data section as it is now only needed at boot up. Suggested-by: David Miller Acked-by: David S. Miller Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/asm-generic/vmlinux.lds.h | 9 ++++----- include/linux/syscalls.h | 18 +++++++++--------- kernel/trace/trace_syscalls.c | 19 ++++++++++--------- 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 57b1b6811b6..fe77e3395b4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -141,7 +141,8 @@ #endif #ifdef CONFIG_FTRACE_SYSCALLS -#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .; \ +#define TRACE_SYSCALLS() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_syscalls_metadata) = .; \ *(__syscalls_metadata) \ VMLINUX_SYMBOL(__stop_syscalls_metadata) = .; #else @@ -175,10 +176,7 @@ VMLINUX_SYMBOL(__stop___verbose) = .; \ LIKELY_PROFILE() \ BRANCH_PROFILE() \ - TRACE_PRINTKS() \ - \ - STRUCT_ALIGN(); \ - TRACE_SYSCALLS() + TRACE_PRINTKS() /* * Data section helpers @@ -483,6 +481,7 @@ *(.init.rodata) \ MCOUNT_REC() \ FTRACE_EVENTS() \ + TRACE_SYSCALLS() \ DEV_DISCARD(init.rodata) \ CPU_DISCARD(init.rodata) \ MEM_DISCARD(init.rodata) \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 45508fec366..98664db1be4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -125,8 +125,7 @@ extern struct trace_event_functions enter_syscall_print_funcs; extern struct trace_event_functions exit_syscall_print_funcs; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ - static struct syscall_metadata \ - __attribute__((__aligned__(4))) __syscall_meta_##sname; \ + static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call __used \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ @@ -140,8 +139,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) #define SYSCALL_TRACE_EXIT_EVENT(sname) \ - static struct syscall_metadata \ - __attribute__((__aligned__(4))) __syscall_meta_##sname; \ + static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call __used \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ @@ -158,8 +156,6 @@ extern struct trace_event_functions exit_syscall_print_funcs; SYSCALL_TRACE_ENTER_EVENT(sname); \ SYSCALL_TRACE_EXIT_EVENT(sname); \ static struct syscall_metadata __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("__syscalls_metadata"))) \ __syscall_meta_##sname = { \ .name = "sys"#sname, \ .nb_args = nb, \ @@ -168,14 +164,15 @@ extern struct trace_event_functions exit_syscall_print_funcs; .enter_event = &event_enter_##sname, \ .exit_event = &event_exit_##sname, \ .enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \ - }; + }; \ + static struct syscall_metadata __used \ + __attribute__((section("__syscalls_metadata"))) \ + *__p_syscall_meta_##sname = &__syscall_meta_##sname; #define SYSCALL_DEFINE0(sname) \ SYSCALL_TRACE_ENTER_EVENT(_##sname); \ SYSCALL_TRACE_EXIT_EVENT(_##sname); \ static struct syscall_metadata __used \ - __attribute__((__aligned__(4))) \ - __attribute__((section("__syscalls_metadata"))) \ __syscall_meta__##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ @@ -183,6 +180,9 @@ extern struct trace_event_functions exit_syscall_print_funcs; .exit_event = &event_exit__##sname, \ .enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \ }; \ + static struct syscall_metadata __used \ + __attribute__((section("__syscalls_metadata"))) \ + *__p_syscall_meta_##sname = &__syscall_meta__##sname; \ asmlinkage long sys_##sname(void) #else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b706529b4fc..5c9fe08d209 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -55,20 +55,21 @@ struct ftrace_event_class event_class_syscall_exit = { .raw_init = init_syscall_trace, }; -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; +extern struct syscall_metadata *__start_syscalls_metadata[]; +extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +static __init struct syscall_metadata * +find_syscall_meta(unsigned long syscall) { - struct syscall_metadata *start; - struct syscall_metadata *stop; + struct syscall_metadata **start; + struct syscall_metadata **stop; char str[KSYM_SYMBOL_LEN]; - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; + start = __start_syscalls_metadata; + stop = __stop_syscalls_metadata; kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { @@ -78,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) * with "SyS" instead of "sys", leading to an unwanted * mismatch. */ - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; + if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + return *start; } return NULL; } -- cgit v1.2.3-70-g09d2