From 13b9b6e746d753d43270a78dd39694912646b5d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 10 Nov 2010 22:19:24 -0500 Subject: tracing: Fix module use of trace_bprintk() On use of trace_printk() there's a macro that determines if the format is static or a variable. If it is static, it defaults to __trace_bprintk() otherwise it uses __trace_printk(). A while ago, Lai Jiangshan added __trace_bprintk(). In that patch, we discussed a way to allow modules to use it. The difference between __trace_bprintk() and __trace_printk() is that for faster processing, just the format and args are stored in the trace instead of running it through a sprintf function. In order to do this, the format used by the __trace_bprintk() had to be persistent. See commit 1ba28e02a18cbdbea123836f6c98efb09cbf59ec The problem comes with trace_bprintk() where the module is unloaded. The pointer left in the buffer is still pointing to the format. To solve this issue, the formats in the module were copied into kernel core. If the same format was used, they would use the same copy (to prevent memory leak). This all worked well until we tried to merge everything. At the time this was written, Lai Jiangshan, Frederic Weisbecker, Ingo Molnar and myself were all touching the same code. When this was merged, we lost the part of it that was in module.c. This kept out the copying of the formats and unloading the module could cause bad pointers left in the ring buffer. This patch adds back (with updates required for current kernel) the module code that sets up the necessary pointers. Cc: Lai Jiangshan Cc: Rusty Russell Signed-off-by: Steven Rostedt --- kernel/module.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524..d190664f25f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * mod->num_trace_events, GFP_KERNEL); #endif +#ifdef CONFIG_TRACING + mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", + sizeof(*mod->trace_bprintk_fmt_start), + &mod->num_trace_bprintk_fmt); + /* + * This section contains pointers to allocated objects in the trace + * code and not scanning it leads to false positives. + */ + kmemleak_scan_area(mod->trace_bprintk_fmt_start, + sizeof(*mod->trace_bprintk_fmt_start) * + mod->num_trace_bprintk_fmt, GFP_KERNEL); +#endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ mod->ftrace_callsites = section_objs(info, "__mcount_loc", -- cgit v1.2.3-70-g09d2 From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Tue, 16 Nov 2010 22:35:16 +0100 Subject: x86: Add RO/NX protection for loadable kernel modules This patch is a logical extension of the protection provided by CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting module_core and module_init into three logical parts each and setting appropriate page access permissions for each individual section: 1. Code: RO+X 2. RO data: RO+NX 3. RW data: RW+NX In order to achieve proper protection, layout_sections() have been modified to align each of the three parts mentioned above onto page boundary. Next, the corresponding page access permissions are set right before successful exit from load_module(). Further, free_module() and sys_init_module have been modified to set module_core and module_init as RW+NX right before calling module_free(). By default, the original section layout and access flags are preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y, the patch will page-align each group of sections to ensure that each page contains only one type of content and will enforce RO/NX for each group of pages. -v1: Initial proof-of-concept patch. -v2: The patch have been re-written to reduce the number of #ifdefs and to make it architecture-agnostic. Code formatting has also been corrected. -v3: Opportunistic RO/NX protection is now unconditional. Section page-alignment is enabled when CONFIG_DEBUG_RODATA=y. -v4: Removed most macros and improved coding style. -v5: Changed page-alignment and RO/NX section size calculation -v6: Fixed comments. Restricted RO/NX enforcement to x86 only -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added calls to set_all_modules_text_rw() and set_all_modules_text_ro() in ftrace -v8: updated for compatibility with linux 2.6.33-rc5 -v9: coding style fixes -v10: more coding style fixes -v11: minor adjustments for -tip -v12: minor adjustments for v2.6.35-rc2-tip -v13: minor adjustments for v2.6.37-rc1-tip Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Acked-by: Arjan van de Ven Reviewed-by: James Morris Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F914.9070106@free.fr> [ minor cleanliness edits, -v14: build failure fix ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 11 +++ arch/x86/kernel/ftrace.c | 3 + include/linux/module.h | 11 ++- kernel/module.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414..45143bbcfe5 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2..298448656b6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; + set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } diff --git a/include/linux/module.h b/include/linux/module.h index b29e7458b96..ddaa689d71b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,6 +308,9 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* Size of RO sections of the module (text+rodata) */ + unsigned int init_ro_size, core_ro_size; + /* Arch-specific module values */ struct mod_arch_specific arch; @@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) { return 0; } - #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -687,6 +689,13 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +extern void set_all_modules_text_rw(void); +extern void set_all_modules_text_ro(void); +#else +static inline void set_all_modules_text_rw(void) { } +static inline void set_all_modules_text_ro(void) { } +#endif #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524..ba421e6b4ad 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -70,6 +71,26 @@ #define ARCH_SHF_SMALL 0 #endif +/* + * Modules' sections will be aligned on page boundaries + * to ensure complete separation of code and data, but + * only when CONFIG_DEBUG_SET_MODULE_RONX=y + */ +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +# define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif + +/* + * Given BASE and SIZE this macro calculates the number of pages the + * memory regions occupies + */ +#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ + PFN_DOWN((unsigned long)BASE) + 1) \ + : (0UL)) + /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod) return 0; } +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +/* + * LKM RO/NX protection: protect module's text/ro-data + * from modification and any data from execution. + */ +void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +{ + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); + unsigned long end_pfn = PFN_DOWN((unsigned long)end); + + if (end_pfn > begin_pfn) + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +} + +static void set_section_ro_nx(void *base, + unsigned long text_size, + unsigned long ro_size, + unsigned long total_size) +{ + /* begin and end PFNs of the current subsection */ + unsigned long begin_pfn; + unsigned long end_pfn; + + /* + * Set RO for module text and RO-data: + * - Always protect first page. + * - Do not protect last partial page. + */ + if (ro_size > 0) + set_page_attributes(base, base + ro_size, set_memory_ro); + + /* + * Set NX permissions for module data: + * - Do not protect first partial page. + * - Always protect last page. + */ + if (total_size > text_size) { + begin_pfn = PFN_UP((unsigned long)base + text_size); + end_pfn = PFN_UP((unsigned long)base + total_size); + if (end_pfn > begin_pfn) + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + } +} + +/* Setting memory back to RW+NX before releasing it */ +void unset_section_ro_nx(struct module *mod, void *module_region) +{ + unsigned long total_pages; + + if (mod->module_core == module_region) { + /* Set core as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); + set_memory_nx((unsigned long)mod->module_core, total_pages); + set_memory_rw((unsigned long)mod->module_core, total_pages); + + } else if (mod->module_init == module_region) { + /* Set init as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); + set_memory_nx((unsigned long)mod->module_init, total_pages); + set_memory_rw((unsigned long)mod->module_init, total_pages); + } +} + +/* Iterate through all modules and set each module's text as RW */ +void set_all_modules_text_rw() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_rw); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_rw); + } + } + mutex_unlock(&module_mutex); +} + +/* Iterate through all modules and set each module's text as RO */ +void set_all_modules_text_ro() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_ro); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_ro); + } + } + mutex_unlock(&module_mutex); +} +#else +static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +#endif + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1566,6 +1696,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1574,6 +1705,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ + unset_section_ro_nx(mod, mod->module_core); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info) s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", name); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->core_size = debug_align(mod->core_size); mod->core_text_size = mod->core_size; + break; + case 1: /* RO: text and ro-data */ + mod->core_size = debug_align(mod->core_size); + mod->core_ro_size = mod->core_size; + break; + case 3: /* whole core */ + mod->core_size = debug_align(mod->core_size); + break; + } } DEBUGP("Init section allocation order:\n"); @@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->init_size = debug_align(mod->init_size); mod->init_text_size = mod->init_size; + break; + case 1: /* RO: text and ro-data */ + mod->init_size = debug_align(mod->init_size); + mod->init_ro_size = mod->init_size; + break; + case 3: /* whole init */ + mod->init_size = debug_align(mod->init_size); + break; + } } } @@ -2650,6 +2804,18 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + /* Done! */ trace_module_load(mod); return mod; @@ -2753,6 +2919,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-70-g09d2 From 94462ad3b14739d158a1ab87bb30008c1e5a6bc1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Nov 2010 13:15:42 -0500 Subject: module: Move RO/NX module protection to after ftrace module update The commit: 84e1c6bb38eb318e456558b610396d9f1afaabf0 x86: Add RO/NX protection for loadable kernel modules Broke the function tracer with this output: ------------[ cut here ]------------ WARNING: at kernel/trace/ftrace.c:1014 ftrace_bug+0x114/0x171() Hardware name: Precision WorkStation 470 Modules linked in: i2c_core(+) Pid: 86, comm: modprobe Not tainted 2.6.37-rc2+ #68 Call Trace: [] warn_slowpath_common+0x85/0x9d [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] warn_slowpath_null+0x1a/0x1c [] ftrace_bug+0x114/0x171 [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] ftrace_process_locs+0x1ae/0x274 [] ? __process_new_adapter+0x7/0x34 [i2c_core] [] ftrace_module_notify+0x39/0x44 [] notifier_call_chain+0x37/0x63 [] __blocking_notifier_call_chain+0x46/0x5b [] blocking_notifier_call_chain+0x14/0x16 [] sys_init_module+0x73/0x1f3 [] system_call_fastpath+0x16/0x1b ---[ end trace 2aff4f4ca53ec746 ]--- ftrace faulted on writing [] __process_new_adapter+0x7/0x34 [i2c_core] The cause was that the module text was set to read only before ftrace could convert the calls to mcount to nops. Thus, the conversions failed due to not being able to write to the text locations. The simple fix is to move setting the module to read only after the module notifiers are called (where ftrace sets the module mcounts to nops). Reported-by: Peter Zijlstra Acked-by: Rusty Russell Signed-off-by: Steven Rostedt --- kernel/module.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 562f665c721..34e00b708fa 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2816,18 +2816,6 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); - /* Set RO and NX regions for core */ - set_section_ro_nx(mod->module_core, - mod->core_text_size, - mod->core_ro_size, - mod->core_size); - - /* Set RO and NX regions for init */ - set_section_ro_nx(mod->module_init, - mod->init_text_size, - mod->init_ro_size, - mod->init_size); - /* Done! */ trace_module_load(mod); return mod; @@ -2888,6 +2876,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) -- cgit v1.2.3-70-g09d2