From bd95b88d9e51fcbf392a7e90338a8fcc3499cbd6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Oct 2008 09:31:27 -0400 Subject: ftrace: release functions from hash The x86 architecture uses a static recording of mcount caller locations and is not affected by this patch. For architectures still using the dynamic ftrace daemon, this patch is critical. It removes the race between the recording of a function that calls mcount, the unloading of a module, and the ftrace daemon updating the call sites. This patch adds the releasing of the hash functions that the daemon uses to update the mcount call sites. When a module is unloaded, not only are the replaced call site table update, but now so is the hash recorded functions that the ftrace daemon will use. Again, architectures that implement MCOUNT_RECORD are not affected by this (which currently only x86 has). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4dda4f60a2a..1f54a94189f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -164,10 +164,14 @@ static DEFINE_SPINLOCK(ftrace_hash_lock); #define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags) #define ftrace_hash_unlock(flags) \ spin_unlock_irqrestore(&ftrace_hash_lock, flags) +static void ftrace_release_hash(unsigned long start, unsigned long end); #else /* This is protected via the ftrace_lock with MCOUNT_RECORD. */ #define ftrace_hash_lock(flags) do { (void)(flags); } while (0) #define ftrace_hash_unlock(flags) do { } while(0) +static inline void ftrace_release_hash(unsigned long start, unsigned long end) +{ +} #endif /* @@ -347,6 +351,7 @@ void ftrace_release(void *start, unsigned long size) } spin_unlock(&ftrace_lock); + ftrace_release_hash(s, e); } static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) @@ -1659,6 +1664,44 @@ void __init ftrace_init(void) ftrace_disabled = 1; } #else /* CONFIG_FTRACE_MCOUNT_RECORD */ + +static void ftrace_release_hash(unsigned long start, unsigned long end) +{ + struct dyn_ftrace *rec; + struct hlist_node *t, *n; + struct hlist_head *head, temp_list; + unsigned long flags; + int i, cpu; + + preempt_disable_notrace(); + + /* disable incase we call something that calls mcount */ + cpu = raw_smp_processor_id(); + per_cpu(ftrace_shutdown_disable_cpu, cpu)++; + + ftrace_hash_lock(flags); + + for (i = 0; i < FTRACE_HASHSIZE; i++) { + INIT_HLIST_HEAD(&temp_list); + head = &ftrace_hash[i]; + + /* all CPUS are stopped, we are safe to modify code */ + hlist_for_each_entry_safe(rec, t, n, head, node) { + if (rec->flags & FTRACE_FL_FREE) + continue; + + if ((rec->ip >= start) && (rec->ip < end)) + ftrace_free_rec(rec); + } + } + + ftrace_hash_unlock(flags); + + per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + preempt_enable_notrace(); + +} + static int ftraced(void *ignore) { unsigned long usecs; -- cgit v1.2.3-70-g09d2 From c2db8054c1eaf99983d8deee347876b01c26c2cf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:11 -0400 Subject: ftrace: fix depends A lot of tracers have HAVE_FTRACE as a dependent config where it really should not. The HAVE_FTRACE is a misnomer (soon to be fixed) and describes if the architecture has the function tracer (mcount) implemented. The ftrace infrastructure is implemented in all archs. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 1cb3e1f616a..5866edbc2ed 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -49,7 +49,6 @@ config IRQSOFF_TRACER default n depends on TRACE_IRQFLAGS_SUPPORT depends on GENERIC_TIME - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACE_IRQFLAGS select TRACING @@ -73,7 +72,6 @@ config PREEMPT_TRACER default n depends on GENERIC_TIME depends on PREEMPT - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING select TRACER_MAX_TRACE @@ -101,7 +99,6 @@ config SYSPROF_TRACER config SCHED_TRACER bool "Scheduling Latency Tracer" - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING select CONTEXT_SWITCH_TRACER @@ -112,7 +109,6 @@ config SCHED_TRACER config CONTEXT_SWITCH_TRACER bool "Trace process context switches" - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING select MARKERS @@ -122,7 +118,6 @@ config CONTEXT_SWITCH_TRACER config BOOT_TRACER bool "Trace boot initcalls" - depends on HAVE_FTRACE depends on DEBUG_KERNEL select TRACING help -- cgit v1.2.3-70-g09d2 From 606576ce816603d9fe1fb453a88bc6eea16ca709 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:12 -0400 Subject: ftrace: rename FTRACE to FUNCTION_TRACER Due to confusion between the ftrace infrastructure and the gcc profiling tracer "ftrace", this patch renames the config options from FTRACE to FUNCTION_TRACER. The other two names that are offspring from FTRACE DYNAMIC_FTRACE and FTRACE_MCOUNT_RECORD will stay the same. This patch was generated mostly by script, and partially by hand. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Makefile | 2 +- arch/arm/Kconfig | 4 ++-- arch/arm/boot/compressed/Makefile | 2 +- arch/arm/include/asm/ftrace.h | 2 +- arch/arm/kernel/armksyms.c | 2 +- arch/arm/kernel/entry-common.S | 4 ++-- arch/powerpc/Kconfig | 2 +- arch/powerpc/Makefile | 2 +- arch/powerpc/include/asm/ftrace.h | 2 +- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/ppc_ksyms.c | 2 +- arch/powerpc/platforms/powermac/Makefile | 2 +- arch/sparc64/Kconfig | 2 +- arch/sparc64/Kconfig.debug | 2 +- arch/sparc64/lib/mcount.S | 4 ++-- arch/x86/Kconfig | 2 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/entry_32.S | 4 ++-- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/i386_ksyms_32.c | 2 +- arch/x86/kernel/x8664_ksyms_64.c | 2 +- arch/x86/xen/Makefile | 2 +- include/asm-x86/ftrace.h | 4 ++-- include/linux/ftrace.h | 12 ++++++------ kernel/Makefile | 4 ++-- kernel/sysctl.c | 2 +- kernel/trace/Kconfig | 17 +++++++++-------- kernel/trace/Makefile | 6 +++--- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 +- kernel/trace/trace_irqsoff.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 4 ++-- kernel/trace/trace_selftest.c | 4 ++-- lib/Makefile | 2 +- 36 files changed, 61 insertions(+), 60 deletions(-) (limited to 'kernel') diff --git a/Makefile b/Makefile index 16e3fbb968a..b7eb70b13ca 100644 --- a/Makefile +++ b/Makefile @@ -536,7 +536,7 @@ KBUILD_CFLAGS += -g KBUILD_AFLAGS += -gdwarf-2 endif -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -pg endif diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4853f9df37b..c2f18ea4050 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -16,8 +16,8 @@ config ARM select HAVE_ARCH_KGDB select HAVE_KPROBES if (!XIP_KERNEL) select HAVE_KRETPROBES if (HAVE_KPROBES) - select HAVE_FTRACE if (!XIP_KERNEL) - select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE) + select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) + select HAVE_DYNAMIC_FTRACE if (HAVE_FUNCTION_TRACER) select HAVE_GENERIC_DMA_COHERENT help The ARM series is a line of low-power-consumption RISC chip designs diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 7a03f200788..c47f2a3f8f8 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -70,7 +70,7 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ targets := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \ head.o misc.o $(OBJS) -ifeq ($(CONFIG_FTRACE),y) +ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) endif diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 584ef9a8e5a..39c8bc1a006 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -1,7 +1,7 @@ #ifndef _ASM_ARM_FTRACE #define _ASM_ARM_FTRACE -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 2357b1cf1cf..c74f766ffc1 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -183,6 +183,6 @@ EXPORT_SYMBOL(_find_next_bit_be); EXPORT_SYMBOL(copy_page); -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(mcount); #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 3aa14dcc5ba..06269ea375c 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -101,7 +101,7 @@ ENDPROC(ret_from_fork) #undef CALL #define CALL(x) .long x -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) stmdb sp!, {r0-r3, lr} @@ -149,7 +149,7 @@ trace: ftrace_stub: mov pc, lr -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ /*============================================================================= * SWI handler diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 380baa1780e..97d86702e2d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -112,7 +112,7 @@ config PPC bool default y select HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE + select HAVE_FUNCTION_TRACER select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_IDE select HAVE_IOREMAP_PROT diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 24dd1a37f8f..1f066706994 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -122,7 +122,7 @@ KBUILD_CFLAGS += -mcpu=powerpc endif # Work around a gcc code-gen bug with -fno-omit-frame-pointer. -ifeq ($(CONFIG_FTRACE),y) +ifeq ($(CONFIG_FUNCTION_TRACER),y) KBUILD_CFLAGS += -mno-sched-epilog endif diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index de921326cca..b298f7a631e 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -1,7 +1,7 @@ #ifndef _ASM_POWERPC_FTRACE #define _ASM_POWERPC_FTRACE -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fdb58253fa5..92673b43858 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,7 +12,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1cbbf703364..7ecc0d1855c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1158,7 +1158,7 @@ machine_check_in_rtas: #endif /* CONFIG_PPC_RTAS */ -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index fd8b4bae9b0..e6d52845854 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -884,7 +884,7 @@ _GLOBAL(enter_prom) mtlr r0 blr -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 8edc2359c41..260089dccfb 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL(single_step_exception); EXPORT_SYMBOL(sys_sigreturn); #endif -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index be60d64be7a..50f16939255 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -1,6 +1,6 @@ CFLAGS_bootx_init.o += -fPIC -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog endif diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 5446e2a499b..d269400d286 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -12,7 +12,7 @@ config SPARC64 bool default y select HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE + select HAVE_FUNCTION_TRACER select HAVE_IDE select HAVE_LMB select HAVE_ARCH_KGDB diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug index d6d32d178fc..c40515c0669 100644 --- a/arch/sparc64/Kconfig.debug +++ b/arch/sparc64/Kconfig.debug @@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC config MCOUNT bool - depends on STACK_DEBUG || FTRACE + depends on STACK_DEBUG || FUNCTION_TRACER default y config FRAME_POINTER diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S index fad90ddb3a2..7ce9c65f359 100644 --- a/arch/sparc64/lib/mcount.S +++ b/arch/sparc64/lib/mcount.S @@ -93,7 +93,7 @@ mcount: nop 1: #endif -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE mov %o7, %o0 .globl mcount_call @@ -119,7 +119,7 @@ mcount_call: .size _mcount,.-_mcount .size mcount,.-mcount -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER .globl ftrace_stub .type ftrace_stub,#function ftrace_stub: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 40ee8080956..290e21aa774 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,7 +28,7 @@ config X86 select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE + select HAVE_FUNCTION_TRACER select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0d41f0343dc..ec3d30136bf 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4e4269c73bb..9d49facc21f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1149,7 +1149,7 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -1204,7 +1204,7 @@ trace: jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 09e7145484c..b86f332c96a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -61,7 +61,7 @@ .code64 -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) retq @@ -138,7 +138,7 @@ trace: jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index dd7ebee446a..43cec6bdda6 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -5,7 +5,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* mcount is defined in assembly */ EXPORT_SYMBOL(mcount); #endif diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b545f371b5f..695e426aa35 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -12,7 +12,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* mcount is defined in assembly */ EXPORT_SYMBOL(mcount); #endif diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 313947940a1..6dcefba7836 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_spinlock.o = -pg CFLAGS_REMOVE_time.o = -pg diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h index 1bb6f9bbe1a..233bb9b869c 100644 --- a/include/asm-x86/ftrace.h +++ b/include/asm-x86/ftrace.h @@ -1,7 +1,7 @@ #ifndef ASM_X86__FTRACE_H #define ASM_X86__FTRACE_H -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ @@ -19,6 +19,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #endif -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* ASM_X86__FTRACE_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3d46151be1..0e952958915 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,7 +8,7 @@ #include #include -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; extern int @@ -36,12 +36,12 @@ void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); -#else /* !CONFIG_FTRACE */ +#else /* !CONFIG_FUNCTION_TRACER */ # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill_atomic(void) { } -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE # define FTRACE_HASHBITS 10 @@ -101,7 +101,7 @@ void ftrace_kill_atomic(void); static inline void tracer_disable(void) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = 0; #endif } @@ -113,7 +113,7 @@ static inline void tracer_disable(void) */ static inline int __ftrace_enabled_save(void) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER int saved_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; return saved_ftrace_enabled; @@ -124,7 +124,7 @@ static inline int __ftrace_enabled_save(void) static inline void __ftrace_enabled_restore(int enabled) { -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = enabled; #endif } diff --git a/kernel/Makefile b/kernel/Makefile index 8f9ce7ec21b..85f588a9d0b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ CFLAGS_REMOVE_sched.o = -mno-spe -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files CFLAGS_REMOVE_lockdep.o = -pg CFLAGS_REMOVE_lockdep_proc.o = -pg @@ -86,7 +86,7 @@ obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_FTRACE) += trace/ +obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e4d6a..619eb9f3acd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -464,7 +464,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER { .ctl_name = CTL_UNNUMBERED, .procname = "ftrace_enabled", diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5866edbc2ed..3533c583df4 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1,11 +1,12 @@ # -# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: +# Architectures that offer an FUNCTION_TRACER implementation should +# select HAVE_FUNCTION_TRACER: # config NOP_TRACER bool -config HAVE_FTRACE +config HAVE_FUNCTION_TRACER bool select NOP_TRACER @@ -28,9 +29,9 @@ config TRACING select STACKTRACE select TRACEPOINTS -config FTRACE +config FUNCTION_TRACER bool "Kernel Function Tracer" - depends on HAVE_FTRACE + depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL select FRAME_POINTER select TRACING @@ -136,9 +137,9 @@ config BOOT_TRACER config STACK_TRACER bool "Trace max stack" - depends on HAVE_FTRACE + depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL - select FTRACE + select FUNCTION_TRACER select STACKTRACE help This special tracer records the maximum stack footprint of the @@ -155,7 +156,7 @@ config STACK_TRACER config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" - depends on FTRACE + depends on FUNCTION_TRACER depends on HAVE_DYNAMIC_FTRACE depends on DEBUG_KERNEL default y @@ -165,7 +166,7 @@ config DYNAMIC_FTRACE with a No-Op instruction) as they are called. A table is created to dynamically enable them again. - This way a CONFIG_FTRACE kernel is slightly larger, but otherwise + This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise has native performance as long as no tracing is active. The changes to the code are done by a kernel thread that diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index a85dfba88ba..c8228b1a49e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -1,7 +1,7 @@ # Do not instrument the tracer itself: -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) @@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif -obj-$(CONFIG_FTRACE) += libftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o -obj-$(CONFIG_FTRACE) += trace_functions.o +obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d345d649d07..aeb2f2505bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -851,7 +851,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) preempt_enable_notrace(); } -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER static void function_trace_call(unsigned long ip, unsigned long parent_ip) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f1f99572cde..6889ca48f1f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -335,7 +335,7 @@ void update_max_tr_single(struct trace_array *tr, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER void tracing_start_function_trace(void); void tracing_stop_function_trace(void); #else diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index a7db7f040ae..9c74071c10e 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -63,7 +63,7 @@ irq_trace(void) */ static __cacheline_aligned_in_smp unsigned long max_sequence; -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* * irqsoff uses its own tracer function to keep the overhead down: */ @@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly = { .func = irqsoff_tracer_call, }; -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ /* * Should this new latency be reported/recorded? diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index fe4a252c236..3ae93f16b56 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock = static void __wakeup_reset(struct trace_array *tr); -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* * irqsoff uses its own tracer function to keep the overhead down: */ @@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly = { .func = wakeup_tracer_call, }; -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ /* * Should this new latency be reported/recorded? diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 09cf230d7ec..95815d26a04 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) return ret; } -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -226,7 +226,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) return ret; } -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_IRQSOFF_TRACER int diff --git a/lib/Makefile b/lib/Makefile index 16feaab057b..7cb65d85aeb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for some libs needed in the kernel. # -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) endif -- cgit v1.2.3-70-g09d2 From 3ce83aea86bf46fd1bff59d2e6d16f48fdce22fc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 19:06:13 -0400 Subject: ftrace: rename the ftrace tracer to function To avoid further confusion between the ftrace infrastructure and the function tracer. This patch renames the "ftrace" function tracer to "function". Now in available_tracers, instead of "ftrace" there will be "function". This makes more sense, since people will not know exactly what the "ftrace" tracer does. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index e90eb0c2c56..0f85a64003d 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr) static struct tracer function_trace __read_mostly = { - .name = "ftrace", + .name = "function", .init = function_trace_init, .reset = function_trace_reset, .ctrl_update = function_trace_ctrl_update, -- cgit v1.2.3-70-g09d2 From 81520a1b0649d0701205b818714a8c1e1cfbbb5b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Oct 2008 21:24:18 -0400 Subject: ftrace: stack tracer only record when on stack The stack trace API does not record if the stack is not on the current task's stack. That is, if the stack is the interrupt stack or NMI stack, the output does not show. Also, the size of those stacks are not consistent with the size of the thread stack, this makes the calculation of the stack size usually bogus. This all confuses the stack tracer. I unfortunately do not have time to fix all these problems, but this patch does record the worst stack when the stack pointer is on the tasks stack (instead of bogus numbers). The patch simply returns if the stack pointer is not on the task's stack. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_stack.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 74c5d9a3afa..be682b62fe5 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -44,6 +44,10 @@ static inline void check_stack(void) if (this_size <= max_stack_size) return; + /* we do not handle interrupt stacks yet */ + if (!object_is_on_stack(&this_size)) + return; + raw_local_irq_save(flags); __raw_spin_lock(&max_stack_lock); -- cgit v1.2.3-70-g09d2 From 3786fc710c32b61464c322e5cd0c3d1d34ae72d0 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Tue, 21 Oct 2008 19:49:09 -0400 Subject: irq: make variable static This variable is only used in the source file, so make it static. Signed-off-by: Roel Kluin Signed-off-by: Ingo Molnar --- kernel/irq/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index fac014a81b2..4d161c70ba5 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action) } } -void register_default_affinity_proc(void) +static void register_default_affinity_proc(void) { #ifdef CONFIG_SMP proc_create("irq/default_smp_affinity", 0600, NULL, -- cgit v1.2.3-70-g09d2 From 17d80fd07d35ae1d231b3378ee4f00ace54f9d31 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Oct 2008 16:31:18 +0200 Subject: tracing: create tracers menu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We seem to have plenty tracers, lets create a menu and not clutter the already cluttered debug menu more. Signed-off-by: Peter Zijlstra Acked-by: Frédéric Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3533c583df4..bc535cb91de 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -29,6 +29,8 @@ config TRACING select STACKTRACE select TRACEPOINTS +menu "Tracers" + config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER @@ -191,3 +193,5 @@ config FTRACE_STARTUP_TEST a series of tests are made to verify that the tracer is functioning properly. It will do tests on all the configured tracers of ftrace. + +endmenu -- cgit v1.2.3-70-g09d2 From 4ce72a2c063a7fa8e42a9435440ae3364115a58d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Oct 2008 15:25:26 +0800 Subject: sched: add CONFIG_SMP consistency a patch from Henrik Austad did this: >> Do not declare select_task_rq as part of sched_class when CONFIG_SMP is >> not set. Peter observed: > While a proper cleanup, could you do it by re-arranging the methods so > as to not create an additional ifdef? Do not declare select_task_rq and some other methods as part of sched_class when CONFIG_SMP is not set. Also gather those methods to avoid CONFIG_SMP mess. Idea-by: Henrik Austad Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Henrik Austad Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 +++++++----- kernel/sched_fair.c | 5 ++--- kernel/sched_idletask.c | 5 ++--- kernel/sched_rt.c | 5 ++--- 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f59c8e8597..c05b45faef1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -897,7 +897,6 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq); - int (*select_task_rq)(struct task_struct *p, int sync); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); @@ -905,6 +904,8 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP + int (*select_task_rq)(struct task_struct *p, int sync); + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, @@ -916,16 +917,17 @@ struct sched_class { void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); -#endif - void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); - void (*task_new) (struct rq *rq, struct task_struct *p); void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); +#endif + + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); + void (*task_new) (struct rq *rq, struct task_struct *p); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a0aa38b10fd..8de48a5da35 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1593,9 +1593,6 @@ static const struct sched_class fair_sched_class = { .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_fair, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_wakeup, @@ -1603,6 +1600,8 @@ static const struct sched_class fair_sched_class = { .put_prev_task = put_prev_task_fair, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_fair, + .load_balance = load_balance_fair, .move_one_task = move_one_task_fair, #endif diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index dec4ccabe2f..8a21a2e28c1 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = { /* dequeue is not valid, we print a debug message there: */ .dequeue_task = dequeue_task_idle, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_idle, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_curr_idle, @@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = { .put_prev_task = put_prev_task_idle, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_idle, + .load_balance = load_balance_idle, .move_one_task = move_one_task_idle, #endif diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index cdf5740ab03..c9aa5bede22 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1502,9 +1502,6 @@ static const struct sched_class rt_sched_class = { .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, -#ifdef CONFIG_SMP - .select_task_rq = select_task_rq_rt, -#endif /* CONFIG_SMP */ .check_preempt_curr = check_preempt_curr_rt, @@ -1512,6 +1509,8 @@ static const struct sched_class rt_sched_class = { .put_prev_task = put_prev_task_rt, #ifdef CONFIG_SMP + .select_task_rq = select_task_rq_rt, + .load_balance = load_balance_rt, .move_one_task = move_one_task_rt, .set_cpus_allowed = set_cpus_allowed_rt, -- cgit v1.2.3-70-g09d2 From 6ae2a0765ab764da11cc305058ee5333810228f4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Oct 2008 10:22:06 -0400 Subject: ring-buffer: fix free page The pages of a buffer was originally pointing to the page struct, it now points to the page address. The freeing of the page still uses the page frame free "__free_page" instead of the correct free_page to the address. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 94af1fe56bb..091aeefe321 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -130,7 +130,7 @@ struct buffer_page { static inline void free_buffer_page(struct buffer_page *bpage) { if (bpage->page) - __free_page(bpage->page); + free_page((unsigned long)bpage->page); kfree(bpage); } -- cgit v1.2.3-70-g09d2 From 593eb8a2d63e95772a5f22d746f18a997c5ee463 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:32:59 -0400 Subject: ftrace: return error on failed modified text. Have the ftrace_modify_code return error values: -EFAULT on error of reading the address -EINVAL if what is read does not match what it expected -EPERM if the write fails to update after a successful match. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 14 +++++++------- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- kernel/trace/ftrace.c | 21 +++++++++++++++------ 3 files changed, 44 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8821ceabf51..428291581cb 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -62,7 +62,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; - int ret; /* * Note: Due to modules and __init, code can @@ -72,15 +71,16 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) - return 1; + if (__copy_from_user_inatomic(replaced, (char __user *)ip, + MCOUNT_INSN_SIZE)) + return -EFAULT; if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return 2; + return -EINVAL; - ret = __copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE); - WARN_ON_ONCE(ret); + if (__copy_to_user_inatomic((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)) + return -EPERM; sync_core(); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0e952958915..79fa10cbdcf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,13 +72,33 @@ extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_mcount_set(unsigned long *data); -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/** + * ftrace_modify_code - modify code segment + * @ip: the address of the code segment + * @old_code: the contents of what is expected to be there + * @new_code: the code to patch in + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code); + extern int skip_trace(unsigned long ip); extern void ftrace_release(void *start, unsigned long size); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1f54a94189f..b2de8de7735 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -596,22 +596,22 @@ ftrace_code_disable(struct dyn_ftrace *rec) { unsigned long ip; unsigned char *nop, *call; - int failed; + int ret; ip = rec->ip; nop = ftrace_nop_replace(); call = ftrace_call_replace(ip, mcount_addr); - failed = ftrace_modify_code(ip, call, nop); - if (failed) { - switch (failed) { - case 1: + ret = ftrace_modify_code(ip, call, nop); + if (ret) { + switch (ret) { + case -EFAULT: WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; - case 2: + case -EINVAL: WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); @@ -620,6 +620,15 @@ ftrace_code_disable(struct dyn_ftrace *rec) print_ip_ins(" replace: ", nop); printk(KERN_CONT "\n"); break; + case -EPERM: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); } rec->flags |= FTRACE_FL_FAILED; -- cgit v1.2.3-70-g09d2 From 81adbdc029ecc416d56563e7f159100181dd711d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:02 -0400 Subject: ftrace: only have ftrace_kill atomic When an anomaly is detected, we need a way to completely disable ftrace. Right now we have two functions: ftrace_kill and ftrace_kill_atomic. The ftrace_kill tries to do it in a "nice" way by converting everything back to a nop. The "nice" way is dangerous itself, so this patch removes it and only has the "atomic" version, which is all that is needed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 42 ++---------------------------------------- kernel/trace/trace.c | 2 +- 3 files changed, 4 insertions(+), 43 deletions(-) (limited to 'kernel') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 79fa10cbdcf..ac58e94668b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -40,7 +40,7 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define register_ftrace_function(ops) do { } while (0) # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) -static inline void ftrace_kill_atomic(void) { } +static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE @@ -117,7 +117,6 @@ static inline void ftrace_release(void *start, unsigned long size) { } /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); -void ftrace_kill_atomic(void); static inline void tracer_disable(void) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b2de8de7735..93245ae046e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1549,22 +1549,6 @@ int ftrace_force_update(void) return ret; } -static void ftrace_force_shutdown(void) -{ - struct task_struct *task; - int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; - - mutex_lock(&ftraced_lock); - task = ftraced_task; - ftraced_task = NULL; - ftraced_suspend = -1; - ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); - - if (task) - kthread_stop(task); -} - static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1795,17 +1779,16 @@ core_initcall(ftrace_dynamic_init); # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) -# define ftrace_force_shutdown() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ /** - * ftrace_kill_atomic - kill ftrace from critical sections + * ftrace_kill - kill ftrace * * This function should be used by panic code. It stops ftrace * but in a not so nice way. If you need to simply kill ftrace * from a non-atomic section, use ftrace_kill. */ -void ftrace_kill_atomic(void) +void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; @@ -1815,27 +1798,6 @@ void ftrace_kill_atomic(void) clear_ftrace_function(); } -/** - * ftrace_kill - totally shutdown ftrace - * - * This is a safety measure. If something was detected that seems - * wrong, calling this function will keep ftrace from doing - * any more modifications, and updates. - * used when something went wrong. - */ -void ftrace_kill(void) -{ - mutex_lock(&ftrace_sysctl_lock); - ftrace_disabled = 1; - ftrace_enabled = 0; - - clear_ftrace_function(); - mutex_unlock(&ftrace_sysctl_lock); - - /* Try to totally disable ftrace */ - ftrace_force_shutdown(); -} - /** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aeb2f2505bc..333a5162149 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3097,7 +3097,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ - ftrace_kill_atomic(); + ftrace_kill(); for_each_tracing_cpu(cpu) { atomic_inc(&global_trace.data[cpu]->disabled); -- cgit v1.2.3-70-g09d2 From 6912896e994ddaf06cc0f6d3f2098bc4b59bdd84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:03 -0400 Subject: ftrace: add ftrace warn on to disable ftrace Add ftrace warn on to disable ftrace as well as report a warning. [ Thanks to Andrew Morton for suggesting using the WARN_ON return value ] Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 93245ae046e..b08996ca561 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -32,6 +32,18 @@ #include "trace.h" +#define FTRACE_WARN_ON(cond) \ + do { \ + if (WARN_ON(cond)) \ + ftrace_kill(); \ + } while (0) + +#define FTRACE_WARN_ON_ONCE(cond) \ + do { \ + if (WARN_ON_ONCE(cond)) \ + ftrace_kill(); \ + } while (0) + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; @@ -363,10 +375,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) rec = ftrace_free_records; if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); ftrace_free_records = NULL; - ftrace_disabled = 1; - ftrace_enabled = 0; return NULL; } @@ -415,7 +425,7 @@ ftrace_record_ip(unsigned long ip) key = hash_long(ip, FTRACE_HASHBITS); - WARN_ON_ONCE(key >= FTRACE_HASHSIZE); + FTRACE_WARN_ON_ONCE(key >= FTRACE_HASHSIZE); if (ftrace_ip_in_hash(ip, key)) goto out; @@ -607,12 +617,12 @@ ftrace_code_disable(struct dyn_ftrace *rec) if (ret) { switch (ret) { case -EFAULT: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); print_ip_sym(ip); break; case -EINVAL: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); print_ip_ins(" expected: ", call); @@ -621,12 +631,12 @@ ftrace_code_disable(struct dyn_ftrace *rec) printk(KERN_CONT "\n"); break; case -EPERM: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on writing "); print_ip_sym(ip); break; default: - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on unknown error "); print_ip_sym(ip); } @@ -1722,8 +1732,7 @@ static int ftraced(void *ignore) ftrace_update_cnt != 1 ? "s" : "", ftrace_update_tot_cnt, usecs, usecs != 1 ? "s" : ""); - ftrace_disabled = 1; - WARN_ON_ONCE(1); + FTRACE_WARN_ON_ONCE(1); } } mutex_unlock(&ftraced_lock); -- cgit v1.2.3-70-g09d2 From cb7be3b2fc2cf089ee52b16f0fd9ebb29e9944e1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:05 -0400 Subject: ftrace: remove daemon The ftrace daemon is complex and error prone. This patch strips it out of the code. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 322 ++++-------------------------------------- kernel/trace/trace_selftest.c | 14 -- 2 files changed, 28 insertions(+), 308 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b08996ca561..e758cab0836 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -165,25 +165,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) } #ifdef CONFIG_DYNAMIC_FTRACE - #ifndef CONFIG_FTRACE_MCOUNT_RECORD -/* - * The hash lock is only needed when the recording of the mcount - * callers are dynamic. That is, by the caller themselves and - * not recorded via the compilation. - */ -static DEFINE_SPINLOCK(ftrace_hash_lock); -#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags) -#define ftrace_hash_unlock(flags) \ - spin_unlock_irqrestore(&ftrace_hash_lock, flags) -static void ftrace_release_hash(unsigned long start, unsigned long end); -#else -/* This is protected via the ftrace_lock with MCOUNT_RECORD. */ -#define ftrace_hash_lock(flags) do { (void)(flags); } while (0) -#define ftrace_hash_unlock(flags) do { } while(0) -static inline void ftrace_release_hash(unsigned long start, unsigned long end) -{ -} +# error Dynamic ftrace depends on MCOUNT_RECORD #endif /* @@ -194,8 +177,6 @@ static inline void ftrace_release_hash(unsigned long start, unsigned long end) */ static unsigned long mcount_addr = MCOUNT_ADDR; -static struct task_struct *ftraced_task; - enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -212,7 +193,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); -static DEFINE_MUTEX(ftraced_lock); static DEFINE_MUTEX(ftrace_regex_lock); struct ftrace_page { @@ -230,10 +210,6 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static int ftraced_trigger; -static int ftraced_suspend; -static int ftraced_stop; - static int ftrace_record_suspend; static struct dyn_ftrace *ftrace_free_records; @@ -398,7 +374,6 @@ static void ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *node; - unsigned long flags; unsigned long key; int resched; int cpu; @@ -430,24 +405,18 @@ ftrace_record_ip(unsigned long ip) if (ftrace_ip_in_hash(ip, key)) goto out; - ftrace_hash_lock(flags); - /* This ip may have hit the hash before the lock */ if (ftrace_ip_in_hash(ip, key)) - goto out_unlock; + goto out; node = ftrace_alloc_dyn_node(ip); if (!node) - goto out_unlock; + goto out; node->ip = ip; ftrace_add_hash(node, key); - ftraced_trigger = 1; - - out_unlock: - ftrace_hash_unlock(flags); out: per_cpu(ftrace_shutdown_disable_cpu, cpu)--; @@ -647,7 +616,7 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } -static int __ftrace_update_code(void *ignore); +static int ftrace_update_code(void *ignore); static int __ftrace_modify_code(void *data) { @@ -659,7 +628,7 @@ static int __ftrace_modify_code(void *data) * Update any recorded ips now that we have the * machine stopped */ - __ftrace_update_code(NULL); + ftrace_update_code(NULL); ftrace_replace_code(1); tracing_on = 1; } else if (*command & FTRACE_DISABLE_CALLS) { @@ -686,26 +655,9 @@ static void ftrace_run_update_code(int command) stop_machine(__ftrace_modify_code, &command, NULL); } -void ftrace_disable_daemon(void) -{ - /* Stop the daemon from calling kstop_machine */ - mutex_lock(&ftraced_lock); - ftraced_stop = 1; - mutex_unlock(&ftraced_lock); - - ftrace_force_update(); -} - -void ftrace_enable_daemon(void) -{ - mutex_lock(&ftraced_lock); - ftraced_stop = 0; - mutex_unlock(&ftraced_lock); - - ftrace_force_update(); -} - static ftrace_func_t saved_ftrace_func; +static int ftrace_start; +static DEFINE_MUTEX(ftrace_start_lock); static void ftrace_startup(void) { @@ -714,9 +666,9 @@ static void ftrace_startup(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); - ftraced_suspend++; - if (ftraced_suspend == 1) + mutex_lock(&ftrace_start_lock); + ftrace_start++; + if (ftrace_start == 1) command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -729,7 +681,7 @@ static void ftrace_startup(void) ftrace_run_update_code(command); out: - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static void ftrace_shutdown(void) @@ -739,9 +691,9 @@ static void ftrace_shutdown(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); - ftraced_suspend--; - if (!ftraced_suspend) + mutex_lock(&ftrace_start_lock); + ftrace_start--; + if (!ftrace_start) command |= FTRACE_DISABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -754,7 +706,7 @@ static void ftrace_shutdown(void) ftrace_run_update_code(command); out: - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static void ftrace_startup_sysctl(void) @@ -764,15 +716,15 @@ static void ftrace_startup_sysctl(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); + mutex_lock(&ftrace_start_lock); /* Force update next time */ saved_ftrace_func = NULL; - /* ftraced_suspend is true if we want ftrace running */ - if (ftraced_suspend) + /* ftrace_start is true if we want ftrace running */ + if (ftrace_start) command |= FTRACE_ENABLE_CALLS; ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static void ftrace_shutdown_sysctl(void) @@ -782,20 +734,20 @@ static void ftrace_shutdown_sysctl(void) if (unlikely(ftrace_disabled)) return; - mutex_lock(&ftraced_lock); - /* ftraced_suspend is true if ftrace is running */ - if (ftraced_suspend) + mutex_lock(&ftrace_start_lock); + /* ftrace_start is true if ftrace is running */ + if (ftrace_start) command |= FTRACE_DISABLE_CALLS; ftrace_run_update_code(command); - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); } static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int __ftrace_update_code(void *ignore) +static int ftrace_update_code(void *ignore) { int i, save_ftrace_enabled; cycle_t start, stop; @@ -869,7 +821,6 @@ static int __ftrace_update_code(void *ignore) stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - ftraced_trigger = 0; ftrace_enabled = save_ftrace_enabled; ftrace_record_suspend--; @@ -877,17 +828,6 @@ static int __ftrace_update_code(void *ignore) return 0; } -static int ftrace_update_code(void) -{ - if (unlikely(ftrace_disabled) || - !ftrace_enabled || !ftraced_trigger) - return 0; - - stop_machine(__ftrace_update_code, NULL, NULL); - - return 1; -} - static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) { struct ftrace_page *pg; @@ -1425,10 +1365,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) } mutex_lock(&ftrace_sysctl_lock); - mutex_lock(&ftraced_lock); - if (iter->filtered && ftraced_suspend && ftrace_enabled) + mutex_lock(&ftrace_start_lock); + if (iter->filtered && ftrace_start && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); - mutex_unlock(&ftraced_lock); + mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); kfree(iter); @@ -1448,55 +1388,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file) return ftrace_regex_release(inode, file, 0); } -static ssize_t -ftraced_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - /* don't worry about races */ - char *buf = ftraced_stop ? "disabled\n" : "enabled\n"; - int r = strlen(buf); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -ftraced_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - long val; - int ret; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - if (strncmp(buf, "enable", 6) == 0) - val = 1; - else if (strncmp(buf, "disable", 7) == 0) - val = 0; - else { - buf[cnt] = 0; - - ret = strict_strtoul(buf, 10, &val); - if (ret < 0) - return ret; - - val = !!val; - } - - if (val) - ftrace_enable_daemon(); - else - ftrace_disable_daemon(); - - filp->f_pos += cnt; - - return cnt; -} - static struct file_operations ftrace_avail_fops = { .open = ftrace_avail_open, .read = seq_read, @@ -1527,38 +1418,6 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; -static struct file_operations ftraced_fops = { - .open = tracing_open_generic, - .read = ftraced_read, - .write = ftraced_write, -}; - -/** - * ftrace_force_update - force an update to all recording ftrace functions - */ -int ftrace_force_update(void) -{ - int ret = 0; - - if (unlikely(ftrace_disabled)) - return -ENODEV; - - mutex_lock(&ftrace_sysctl_lock); - mutex_lock(&ftraced_lock); - - /* - * If ftraced_trigger is not set, then there is nothing - * to update. - */ - if (ftraced_trigger && !ftrace_update_code()) - ret = -EBUSY; - - mutex_unlock(&ftraced_lock); - mutex_unlock(&ftrace_sysctl_lock); - - return ret; -} - static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; @@ -1589,17 +1448,11 @@ static __init int ftrace_init_debugfs(void) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); - entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer, - NULL, &ftraced_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'ftraced_enabled' entry\n"); return 0; } fs_initcall(ftrace_init_debugfs); -#ifdef CONFIG_FTRACE_MCOUNT_RECORD static int ftrace_convert_nops(unsigned long *start, unsigned long *end) { @@ -1619,7 +1472,7 @@ static int ftrace_convert_nops(unsigned long *start, /* p is ignored */ local_irq_save(flags); - __ftrace_update_code(p); + ftrace_update_code(p); local_irq_restore(flags); return 0; @@ -1666,122 +1519,6 @@ void __init ftrace_init(void) failed: ftrace_disabled = 1; } -#else /* CONFIG_FTRACE_MCOUNT_RECORD */ - -static void ftrace_release_hash(unsigned long start, unsigned long end) -{ - struct dyn_ftrace *rec; - struct hlist_node *t, *n; - struct hlist_head *head, temp_list; - unsigned long flags; - int i, cpu; - - preempt_disable_notrace(); - - /* disable incase we call something that calls mcount */ - cpu = raw_smp_processor_id(); - per_cpu(ftrace_shutdown_disable_cpu, cpu)++; - - ftrace_hash_lock(flags); - - for (i = 0; i < FTRACE_HASHSIZE; i++) { - INIT_HLIST_HEAD(&temp_list); - head = &ftrace_hash[i]; - - /* all CPUS are stopped, we are safe to modify code */ - hlist_for_each_entry_safe(rec, t, n, head, node) { - if (rec->flags & FTRACE_FL_FREE) - continue; - - if ((rec->ip >= start) && (rec->ip < end)) - ftrace_free_rec(rec); - } - } - - ftrace_hash_unlock(flags); - - per_cpu(ftrace_shutdown_disable_cpu, cpu)--; - preempt_enable_notrace(); - -} - -static int ftraced(void *ignore) -{ - unsigned long usecs; - - while (!kthread_should_stop()) { - - set_current_state(TASK_INTERRUPTIBLE); - - /* check once a second */ - schedule_timeout(HZ); - - if (unlikely(ftrace_disabled)) - continue; - - mutex_lock(&ftrace_sysctl_lock); - mutex_lock(&ftraced_lock); - if (!ftraced_suspend && !ftraced_stop && - ftrace_update_code()) { - usecs = nsecs_to_usecs(ftrace_update_time); - if (ftrace_update_tot_cnt > 100000) { - ftrace_update_tot_cnt = 0; - pr_info("hm, dftrace overflow: %lu change%s" - " (%lu total) in %lu usec%s\n", - ftrace_update_cnt, - ftrace_update_cnt != 1 ? "s" : "", - ftrace_update_tot_cnt, - usecs, usecs != 1 ? "s" : ""); - FTRACE_WARN_ON_ONCE(1); - } - } - mutex_unlock(&ftraced_lock); - mutex_unlock(&ftrace_sysctl_lock); - - ftrace_shutdown_replenish(); - } - __set_current_state(TASK_RUNNING); - return 0; -} - -static int __init ftrace_dynamic_init(void) -{ - struct task_struct *p; - unsigned long addr; - int ret; - - addr = (unsigned long)ftrace_record_ip; - - stop_machine(ftrace_dyn_arch_init, &addr, NULL); - - /* ftrace_dyn_arch_init places the return code in addr */ - if (addr) { - ret = (int)addr; - goto failed; - } - - ret = ftrace_dyn_table_alloc(NR_TO_INIT); - if (ret) - goto failed; - - p = kthread_run(ftraced, NULL, "ftraced"); - if (IS_ERR(p)) { - ret = -1; - goto failed; - } - - last_ftrace_enabled = ftrace_enabled = 1; - ftraced_task = p; - - return 0; - - failed: - ftrace_disabled = 1; - return ret; -} - -core_initcall(ftrace_dynamic_init); -#endif /* CONFIG_FTRACE_MCOUNT_RECORD */ #else # define ftrace_startup() do { } while (0) @@ -1801,9 +1538,6 @@ void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; -#ifdef CONFIG_DYNAMIC_FTRACE - ftraced_suspend = -1; -#endif clear_ftrace_function(); } diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 95815d26a04..90bc752a758 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, /* passed in by parameter to fool gcc from optimizing */ func(); - /* update the records */ - ret = ftrace_force_update(); - if (ret) { - printk(KERN_CONT ".. ftraced failed .. "); - return ret; - } - /* * Some archs *cough*PowerPC*cough* add charachters to the * start of the function names. We simply put a '*' to @@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* make sure msleep has been recorded */ msleep(1); - /* force the recorded functions to be traced */ - ret = ftrace_force_update(); - if (ret) { - printk(KERN_CONT ".. ftraced failed .. "); - return ret; - } - /* start the tracing */ ftrace_enabled = 1; tracer_enabled = 1; -- cgit v1.2.3-70-g09d2 From 4d296c24326783bff1282ac72f310d8bac8df413 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:06 -0400 Subject: ftrace: remove mcount set The arch dependent function ftrace_mcount_set was only used by the daemon start up code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/kernel/ftrace.c | 13 ------------- arch/powerpc/kernel/ftrace.c | 17 ----------------- arch/sparc64/kernel/ftrace.c | 18 ------------------ arch/x86/kernel/ftrace.c | 7 ------- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 9 --------- 6 files changed, 65 deletions(-) (limited to 'kernel') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 76d50e6091b..6c90479e897 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -95,19 +95,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -int ftrace_mcount_set(unsigned long *data) -{ - unsigned long pc, old; - unsigned long *addr = data; - unsigned char *new; - - pc = (unsigned long)&mcount_call; - memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(pc, *addr); - *addr = ftrace_modify_code(pc, (unsigned char *)&old, new); - return 0; -} - /* run from kstop_machine */ int __init ftrace_dyn_arch_init(void *data) { diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 3855ceb937b..6b75522e8b3 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -126,23 +126,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { /* This is running in kstop_machine */ diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c index 4298d0aee71..447942041a7 100644 --- a/arch/sparc64/kernel/ftrace.c +++ b/arch/sparc64/kernel/ftrace.c @@ -69,24 +69,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(ip, old, new); } -notrace int ftrace_mcount_set(unsigned long *data) -{ - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - - return 0; -} - - int __init ftrace_dyn_arch_init(void *data) { ftrace_mcount_set(data); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index da4fb0deecf..b399eed2353 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -103,13 +103,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - /* mcount is initialized as a nop */ - *data = 0; - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { extern const unsigned char ftrace_test_p6nop[]; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ac58e94668b..1c4835f8691 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -71,7 +71,6 @@ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); -extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e758cab0836..226fd9132d5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -620,7 +620,6 @@ static int ftrace_update_code(void *ignore); static int __ftrace_modify_code(void *data) { - unsigned long addr; int *command = data; if (*command & FTRACE_ENABLE_CALLS) { @@ -639,14 +638,6 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); - if (*command & FTRACE_ENABLE_MCOUNT) { - addr = (unsigned long)ftrace_record_ip; - ftrace_mcount_set(&addr); - } else if (*command & FTRACE_DISABLE_MCOUNT) { - addr = (unsigned long)ftrace_stub; - ftrace_mcount_set(&addr); - } - return 0; } -- cgit v1.2.3-70-g09d2 From 08f5ac906d2c0faf96d608c54a0b03177376da8d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 09:33:07 -0400 Subject: ftrace: remove ftrace hash The ftrace hash was used by the ftrace_daemon code. The record ip function would place the calling address (ip) into the hash. The daemon would later read the hash and modify that code. The hash complicates the code. This patch removes it. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 8 +- kernel/trace/ftrace.c | 243 +++++++------------------------------------------ kernel/trace/trace.c | 3 - 3 files changed, 38 insertions(+), 216 deletions(-) (limited to 'kernel') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1c4835f8691..703eb53cfa2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -44,8 +44,6 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE -# define FTRACE_HASHBITS 10 -# define FTRACE_HASHSIZE (1< #include #include -#include #include #include @@ -189,9 +188,7 @@ static int ftrace_filtered; static int tracing_on; static int frozen_record_count; -static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; - -static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); +static LIST_HEAD(ftrace_new_addrs); static DEFINE_MUTEX(ftrace_regex_lock); @@ -210,8 +207,6 @@ struct ftrace_page { static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; -static int ftrace_record_suspend; - static struct dyn_ftrace *ftrace_free_records; @@ -242,72 +237,6 @@ static inline int record_frozen(struct dyn_ftrace *rec) # define record_frozen(rec) ({ 0; }) #endif /* CONFIG_KPROBES */ -int skip_trace(unsigned long ip) -{ - unsigned long fl; - struct dyn_ftrace *rec; - struct hlist_node *t; - struct hlist_head *head; - - if (frozen_record_count == 0) - return 0; - - head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)]; - hlist_for_each_entry_rcu(rec, t, head, node) { - if (rec->ip == ip) { - if (record_frozen(rec)) { - if (rec->flags & FTRACE_FL_FAILED) - return 1; - - if (!(rec->flags & FTRACE_FL_CONVERTED)) - return 1; - - if (!tracing_on || !ftrace_enabled) - return 1; - - if (ftrace_filtered) { - fl = rec->flags & (FTRACE_FL_FILTER | - FTRACE_FL_NOTRACE); - if (!fl || (fl & FTRACE_FL_NOTRACE)) - return 1; - } - } - break; - } - } - - return 0; -} - -static inline int -ftrace_ip_in_hash(unsigned long ip, unsigned long key) -{ - struct dyn_ftrace *p; - struct hlist_node *t; - int found = 0; - - hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { - if (p->ip == ip) { - found = 1; - break; - } - } - - return found; -} - -static inline void -ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) -{ - hlist_add_head_rcu(&node->node, &ftrace_hash[key]); -} - -/* called from kstop_machine */ -static inline void ftrace_del_hash(struct dyn_ftrace *node) -{ - hlist_del(&node->node); -} - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->ip = (unsigned long)ftrace_free_records; @@ -362,69 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) } if (ftrace_pages->index == ENTRIES_PER_PAGE) { - if (!ftrace_pages->next) - return NULL; + if (!ftrace_pages->next) { + /* allocate another page */ + ftrace_pages->next = + (void *)get_zeroed_page(GFP_KERNEL); + if (!ftrace_pages->next) + return NULL; + } ftrace_pages = ftrace_pages->next; } return &ftrace_pages->records[ftrace_pages->index++]; } -static void +static struct dyn_ftrace * ftrace_record_ip(unsigned long ip) { - struct dyn_ftrace *node; - unsigned long key; - int resched; - int cpu; + struct dyn_ftrace *rec; if (!ftrace_enabled || ftrace_disabled) - return; - - resched = need_resched(); - preempt_disable_notrace(); - - /* - * We simply need to protect against recursion. - * Use the the raw version of smp_processor_id and not - * __get_cpu_var which can call debug hooks that can - * cause a recursive crash here. - */ - cpu = raw_smp_processor_id(); - per_cpu(ftrace_shutdown_disable_cpu, cpu)++; - if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1) - goto out; - - if (unlikely(ftrace_record_suspend)) - goto out; - - key = hash_long(ip, FTRACE_HASHBITS); - - FTRACE_WARN_ON_ONCE(key >= FTRACE_HASHSIZE); - - if (ftrace_ip_in_hash(ip, key)) - goto out; - - /* This ip may have hit the hash before the lock */ - if (ftrace_ip_in_hash(ip, key)) - goto out; - - node = ftrace_alloc_dyn_node(ip); - if (!node) - goto out; + return NULL; - node->ip = ip; + rec = ftrace_alloc_dyn_node(ip); + if (!rec) + return NULL; - ftrace_add_hash(node, key); + rec->ip = ip; - out: - per_cpu(ftrace_shutdown_disable_cpu, cpu)--; + list_add(&rec->list, &ftrace_new_addrs); - /* prevent recursion with scheduler */ - if (resched) - preempt_enable_no_resched_notrace(); - else - preempt_enable_notrace(); + return rec; } #define FTRACE_ADDR ((long)(ftrace_caller)) @@ -543,7 +439,6 @@ static void ftrace_replace_code(int enable) rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { - ftrace_del_hash(rec); ftrace_free_rec(rec); } } @@ -551,15 +446,6 @@ static void ftrace_replace_code(int enable) } } -static void ftrace_shutdown_replenish(void) -{ - if (ftrace_pages->next) - return; - - /* allocate another page */ - ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); -} - static void print_ip_ins(const char *fmt, unsigned char *p) { int i; @@ -616,18 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec) return 1; } -static int ftrace_update_code(void *ignore); - static int __ftrace_modify_code(void *data) { int *command = data; if (*command & FTRACE_ENABLE_CALLS) { - /* - * Update any recorded ips now that we have the - * machine stopped - */ - ftrace_update_code(NULL); ftrace_replace_code(1); tracing_on = 1; } else if (*command & FTRACE_DISABLE_CALLS) { @@ -738,84 +617,34 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void *ignore) +static int ftrace_update_code(void) { - int i, save_ftrace_enabled; + struct dyn_ftrace *p, *t; cycle_t start, stop; - struct dyn_ftrace *p; - struct hlist_node *t, *n; - struct hlist_head *head, temp_list; - - /* Don't be recording funcs now */ - ftrace_record_suspend++; - save_ftrace_enabled = ftrace_enabled; - ftrace_enabled = 0; start = ftrace_now(raw_smp_processor_id()); ftrace_update_cnt = 0; - /* No locks needed, the machine is stopped! */ - for (i = 0; i < FTRACE_HASHSIZE; i++) { - INIT_HLIST_HEAD(&temp_list); - head = &ftrace_hash[i]; + list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) { - /* all CPUS are stopped, we are safe to modify code */ - hlist_for_each_entry_safe(p, t, n, head, node) { - /* Skip over failed records which have not been - * freed. */ - if (p->flags & FTRACE_FL_FAILED) - continue; + /* If something went wrong, bail without enabling anything */ + if (unlikely(ftrace_disabled)) + return -1; - /* Unconverted records are always at the head of the - * hash bucket. Once we encounter a converted record, - * simply skip over to the next bucket. Saves ftraced - * some processor cycles (ftrace does its bid for - * global warming :-p ). */ - if (p->flags & (FTRACE_FL_CONVERTED)) - break; + list_del_init(&p->list); - /* Ignore updates to this record's mcount site. - * Reintroduce this record at the head of this - * bucket to attempt to "convert" it again if - * the kprobe on it is unregistered before the - * next run. */ - if (get_kprobe((void *)p->ip)) { - ftrace_del_hash(p); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, &temp_list); - freeze_record(p); - continue; - } else { - unfreeze_record(p); - } - - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else { - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(p->ip)) { - ftrace_del_hash(p); - ftrace_free_rec(p); - } - } - } - - hlist_for_each_entry_safe(p, t, n, &temp_list, node) { - hlist_del(&p->node); - INIT_HLIST_NODE(&p->node); - hlist_add_head(&p->node, head); - } + /* convert record (i.e, patch mcount-call with NOP) */ + if (ftrace_code_disable(p)) { + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + } else + ftrace_free_rec(p); } stop = ftrace_now(raw_smp_processor_id()); ftrace_update_time = stop - start; ftrace_update_tot_cnt += ftrace_update_cnt; - ftrace_enabled = save_ftrace_enabled; - ftrace_record_suspend--; - return 0; } @@ -847,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) pg = ftrace_pages = ftrace_pages_start; cnt = num_to_init / ENTRIES_PER_PAGE; - pr_info("ftrace: allocating %ld hash entries in %d pages\n", + pr_info("ftrace: allocating %ld entries in %d pages\n", num_to_init, cnt); for (i = 0; i < cnt; i++) { @@ -1451,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start, unsigned long addr; unsigned long flags; + mutex_lock(&ftrace_start_lock); p = start; while (p < end) { addr = ftrace_call_adjust(*p++); - /* should not be called from interrupt context */ - spin_lock(&ftrace_lock); ftrace_record_ip(addr); - spin_unlock(&ftrace_lock); - ftrace_shutdown_replenish(); } - /* p is ignored */ + /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(p); + ftrace_update_code(); local_irq_restore(flags); + mutex_unlock(&ftrace_start_lock); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 333a5162149..06951e22944 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -865,9 +865,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (unlikely(!ftrace_function_enabled)) return; - if (skip_trace(ip)) - return; - pc = preempt_count(); resched = need_resched(); preempt_disable_notrace(); -- cgit v1.2.3-70-g09d2 From 66b0de3569b00f61978782b9f97aa4803dbec0fb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Oct 2008 16:11:03 +0200 Subject: ftrace: fix build failure fix: kernel/trace/ftrace.c: In function 'ftrace_release': kernel/trace/ftrace.c:271: error: implicit declaration of function 'ftrace_release_hash' release_hash is not needed without dftraced. Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 07762c08a94..27212321eb0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -267,8 +267,6 @@ void ftrace_release(void *start, unsigned long size) } } spin_unlock(&ftrace_lock); - - ftrace_release_hash(s, e); } static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) -- cgit v1.2.3-70-g09d2 From acff181d3574244e651913df77332e897b88bff4 Mon Sep 17 00:00:00 2001 From: roel kluin Date: Tue, 21 Oct 2008 19:49:09 -0400 Subject: printk: remove unused code from kernel/printk.c both log_buf_copy() and log_buf_len are unused. Signed-off-by: Ingo Molnar --- kernel/printk.c | 39 --------------------------------------- 1 file changed, 39 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 6341af77eb6..f492f1583d7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -232,45 +232,6 @@ static inline void boot_delay_msec(void) } #endif -/* - * Return the number of unread characters in the log buffer. - */ -static int log_buf_get_len(void) -{ - return logged_chars; -} - -/* - * Copy a range of characters from the log buffer. - */ -int log_buf_copy(char *dest, int idx, int len) -{ - int ret, max; - bool took_lock = false; - - if (!oops_in_progress) { - spin_lock_irq(&logbuf_lock); - took_lock = true; - } - - max = log_buf_get_len(); - if (idx < 0 || idx >= max) { - ret = -1; - } else { - if (len > max) - len = max; - ret = len; - idx += (log_end - max); - while (len-- > 0) - dest[len] = LOG_BUF(idx + len); - } - - if (took_lock) - spin_unlock_irq(&logbuf_lock); - - return ret; -} - /* * Commands to do_syslog: * -- cgit v1.2.3-70-g09d2 From bea92112415635ecb7e681355834413c7c048f67 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 22 Oct 2008 19:31:11 +0900 Subject: kernel/resource: fix reserve_region_with_split() section mismatch Impact: cleanup, small kernel text size reduction, no functionality changed reserve_region_with_split() calls in to __reserve_region_with_split(), which is an __init function. The only caller of reserve_region_with_split() is an __init function, so make it __init too. Signed-off-by: Paul Mundt Signed-off-by: Ingo Molnar --- kernel/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 4089d12af6e..7fec0e42723 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -571,7 +571,7 @@ static void __init __reserve_region_with_split(struct resource *root, } -void reserve_region_with_split(struct resource *root, +void __init reserve_region_with_split(struct resource *root, resource_size_t start, resource_size_t end, const char *name) { -- cgit v1.2.3-70-g09d2 From 01c8c57d668d94f1036d9ab11a22aa24ca16a35d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Oct 2008 11:06:12 +0200 Subject: sched: fix a find_busiest_group buglet In one of the group load balancer patches: commit 408ed066b11cf9ee4536573b4269ee3613bd735e Author: Peter Zijlstra Date: Fri Jun 27 13:41:28 2008 +0200 Subject: sched: hierarchical load vs find_busiest_group The following change: - if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >= + if (max_load - this_load + 2*busiest_load_per_task >= busiest_load_per_task * imbn) { made the condition always true, because imbn is [1,2]. Therefore, remove the 2*, and give the it a fair chance. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 6625c3c4b10..12bc367d924 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3344,7 +3344,7 @@ small_imbalance: } else this_load_per_task = cpu_avg_load_per_task(this_cpu); - if (max_load - this_load + 2*busiest_load_per_task >= + if (max_load - this_load + busiest_load_per_task >= busiest_load_per_task * imbn) { *imbalance = busiest_load_per_task; return busiest; -- cgit v1.2.3-70-g09d2 From 1af5f730fc1bf7c62ec9fb2d307206e18bf40a69 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Oct 2008 11:06:13 +0200 Subject: sched: more accurate min_vruntime accounting Mike noticed the current min_vruntime tracking can go wrong and skip the current task. If the only remaining task in the tree is a nice 19 task with huge vruntime, new tasks will be inserted too far to the right too, causing some interactibity issues. min_vruntime can only change due to the leftmost entry disappearing (dequeue_entity()), or by the leftmost entry being incremented past the next entry, which elects a new leftmost (__update_curr()) Due to the current entry not being part of the actual tree, we have to compare the leftmost tree entry with the current entry, and take the leftmost of these two. So create a update_min_vruntime() function that takes computes the leftmost vruntime in the system (either tree of current) and increases the cfs_rq->min_vruntime if the computed value is larger than the previously found min_vruntime. And call this from the two sites we've identified that can change min_vruntime. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 42d211f08c9..b27ccc52f6a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -223,6 +223,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) return se->vruntime - cfs_rq->min_vruntime; } +static void update_min_vruntime(struct cfs_rq *cfs_rq) +{ + u64 vruntime = cfs_rq->min_vruntime; + + if (cfs_rq->curr) + vruntime = cfs_rq->curr->vruntime; + + if (cfs_rq->rb_leftmost) { + struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, + struct sched_entity, + run_node); + + if (vruntime == cfs_rq->min_vruntime) + vruntime = se->vruntime; + else + vruntime = min_vruntime(vruntime, se->vruntime); + } + + cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); +} + /* * Enqueue an entity into the rb-tree: */ @@ -256,15 +277,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) * Maintain a cache of leftmost tree entries (it is frequently * used): */ - if (leftmost) { + if (leftmost) cfs_rq->rb_leftmost = &se->run_node; - /* - * maintain cfs_rq->min_vruntime to be a monotonic increasing - * value tracking the leftmost vruntime in the tree. - */ - cfs_rq->min_vruntime = - max_vruntime(cfs_rq->min_vruntime, se->vruntime); - } rb_link_node(&se->run_node, parent, link); rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); @@ -274,18 +288,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { if (cfs_rq->rb_leftmost == &se->run_node) { struct rb_node *next_node; - struct sched_entity *next; next_node = rb_next(&se->run_node); cfs_rq->rb_leftmost = next_node; - - if (next_node) { - next = rb_entry(next_node, - struct sched_entity, run_node); - cfs_rq->min_vruntime = - max_vruntime(cfs_rq->min_vruntime, - next->vruntime); - } } if (cfs_rq->next == se) @@ -424,6 +429,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, schedstat_add(cfs_rq, exec_clock, delta_exec); delta_exec_weighted = calc_delta_fair(delta_exec, curr); curr->vruntime += delta_exec_weighted; + update_min_vruntime(cfs_rq); } static void update_curr(struct cfs_rq *cfs_rq) @@ -613,13 +619,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { - u64 vruntime; - - if (first_fair(cfs_rq)) { - vruntime = min_vruntime(cfs_rq->min_vruntime, - __pick_next_entity(cfs_rq)->vruntime); - } else - vruntime = cfs_rq->min_vruntime; + u64 vruntime = cfs_rq->min_vruntime; /* * The 'current' period is already promised to the current tasks, @@ -696,6 +696,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); + update_min_vruntime(cfs_rq); } /* -- cgit v1.2.3-70-g09d2 From 0d13033bc9257fe65c1aa25e84568b1608da0901 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 24 Oct 2008 11:06:14 +0200 Subject: sched: weaken sync hint Mysql+oltp and pgsql+oltp peaks are still shifted right. The below puts the peaks back to 1 client/server pair per core. Use the avg_overlap information to weaken the sync hint. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b27ccc52f6a..b71ee2c6229 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1123,10 +1123,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) return 0; - if (!sync && sched_feat(SYNC_WAKEUPS) && - curr->se.avg_overlap < sysctl_sched_migration_cost && - p->se.avg_overlap < sysctl_sched_migration_cost) - sync = 1; + if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || + p->se.avg_overlap > sysctl_sched_migration_cost)) + sync = 0; /* * If sync wakeup then subtract the (maximum possible) -- cgit v1.2.3-70-g09d2 From 464b75273f64be7c81fee975bd6ca9593df3427b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Oct 2008 11:06:15 +0200 Subject: sched: re-instate vruntime based wakeup preemption The advantage is that vruntime based wakeup preemption has a better conceptual model. Here wakeup_gran = 0 means: preempt when 'fair'. Therefore wakeup_gran is the granularity of unfairness we allow in order to make progress. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b71ee2c6229..7af17e04a6d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return se->parent; } +/* return depth at which a sched entity is present in the hierarchy */ +static inline int depth_se(struct sched_entity *se) +{ + int depth = 0; + + for_each_sched_entity(se) + depth++; + + return depth; +} + +static void +find_matching_se(struct sched_entity **se, struct sched_entity **pse) +{ + int se_depth, pse_depth; + + /* + * preemption test can be made between sibling entities who are in the + * same cfs_rq i.e who have a common parent. Walk up the hierarchy of + * both tasks until we find their ancestors who are siblings of common + * parent. + */ + + /* First walk up until both entities are at same depth */ + se_depth = depth_se(*se); + pse_depth = depth_se(*pse); + + while (se_depth > pse_depth) { + se_depth--; + *se = parent_entity(*se); + } + + while (pse_depth > se_depth) { + pse_depth--; + *pse = parent_entity(*pse); + } + + while (!is_same_group(*se, *pse)) { + *se = parent_entity(*se); + *pse = parent_entity(*pse); + } +} + #else /* CONFIG_FAIR_GROUP_SCHED */ static inline struct rq *rq_of(struct cfs_rq *cfs_rq) @@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return NULL; } +static inline void +find_matching_se(struct sched_entity **se, struct sched_entity **pse) +{ +} + #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -1244,12 +1292,41 @@ static unsigned long wakeup_gran(struct sched_entity *se) * More easily preempt - nice tasks, while not making it harder for * + nice tasks. */ - if (sched_feat(ASYM_GRAN)) - gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); + if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) + gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); return gran; } +/* + * Should 'se' preempt 'curr'. + * + * |s1 + * |s2 + * |s3 + * g + * |<--->|c + * + * w(c, s1) = -1 + * w(c, s2) = 0 + * w(c, s3) = 1 + * + */ +static int +wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) +{ + s64 gran, vdiff = curr->vruntime - se->vruntime; + + if (vdiff <= 0) + return -1; + + gran = wakeup_gran(curr); + if (vdiff > gran) + return 1; + + return 0; +} + /* * Preempt the current task with a newly woken task if needed: */ @@ -1258,7 +1335,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) struct task_struct *curr = rq->curr; struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; - s64 delta_exec; if (unlikely(rt_prio(p->prio))) { update_rq_clock(rq); @@ -1296,9 +1372,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; } - delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; - if (delta_exec > wakeup_gran(pse)) - resched_task(curr); + find_matching_se(&se, &pse); + + while (se) { + BUG_ON(!pse); + + if (wakeup_preempt_entity(se, pse) == 1) { + resched_task(curr); + break; + } + + se = parent_entity(se); + pse = parent_entity(pse); + } } static struct task_struct *pick_next_task_fair(struct rq *rq) -- cgit v1.2.3-70-g09d2 From 3f3a490480d8ab96e0fe30a41f80f14e6a0c579d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Oct 2008 11:06:16 +0200 Subject: sched: virtual time buddy preemption Since we moved wakeup preemption back to virtual time, it makes sense to move the buddy stuff back as well. The purpose of the buddy scheduling is to allow a quickly scheduling pair of tasks to run away from the group as far as a regular busy task would be allowed under wakeup preemption. This has the advantage that the pair can ping-pong for a while, enjoying cache-hotness. Without buddy scheduling other tasks would interleave destroying the cache. Also, it saves a word in cfs_rq. Signed-off-by: Peter Zijlstra Acked-by: Mike Galbraith Signed-off-by: Ingo Molnar --- kernel/sched.c | 1 - kernel/sched_fair.c | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 12bc367d924..e8819bc6f46 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -386,7 +386,6 @@ struct cfs_rq { u64 exec_clock; u64 min_vruntime; - u64 pair_start; struct rb_root tasks_timeline; struct rb_node *rb_leftmost; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 7af17e04a6d..ce514afd78f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -791,16 +791,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) se->prev_sum_exec_runtime = se->sum_exec_runtime; } +static int +wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); + static struct sched_entity * pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct rq *rq = rq_of(cfs_rq); - u64 pair_slice = rq->clock - cfs_rq->pair_start; - - if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { - cfs_rq->pair_start = rq->clock; + if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) return se; - } return cfs_rq->next; } -- cgit v1.2.3-70-g09d2 From f17845e5d97ead8fbdadfd40039e058ec7cf4a42 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Oct 2008 12:47:10 +0200 Subject: ftrace: warning in kernel/trace/ftrace.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this warning: kernel/trace/ftrace.c:189: warning: ‘frozen_record_count’ defined but not used triggers because frozen_record_count is only used in the KCONFIG_MARKERS case. Move the variable it there. Alas, this frozen-record facility seems to have little use. The frozen_record_count variable is not used by anything, nor the flags. So this section might need a bit of dead-code-removal care as well. Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 27212321eb0..7618c528756 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -186,7 +186,6 @@ enum { static int ftrace_filtered; static int tracing_on; -static int frozen_record_count; static LIST_HEAD(ftrace_new_addrs); @@ -211,6 +210,9 @@ static struct dyn_ftrace *ftrace_free_records; #ifdef CONFIG_KPROBES + +static int frozen_record_count; + static inline void freeze_record(struct dyn_ftrace *rec) { if (!(rec->flags & FTRACE_FL_FROZEN)) { @@ -1443,3 +1445,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_unlock(&ftrace_sysctl_lock); return ret; } + -- cgit v1.2.3-70-g09d2 From e2862c9470beb842d3f1c1965b03a2112114c160 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 27 Oct 2008 17:43:28 +1100 Subject: trace: fix printk warning for u64 A powerpc ppc64_defconfig build produces these warnings: kernel/trace/ring_buffer.c: In function 'rb_add_time_stamp': kernel/trace/ring_buffer.c:969: warning: format '%llu' expects type 'long long unsigned int', but argument 2 has type 'u64' kernel/trace/ring_buffer.c:969: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'u64' kernel/trace/ring_buffer.c:969: warning: format '%llu' expects type 'long long unsigned int', but argument 4 has type 'u64' Just cast the u64s to unsigned long long like we do everywhere else. Signed-off-by: Stephen Rothwell Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 091aeefe321..cedf4e26828 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -966,7 +966,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, if (unlikely(*delta > (1ULL << 59) && !once++)) { printk(KERN_WARNING "Delta way too big! %llu" " ts=%llu write stamp = %llu\n", - *delta, *ts, cpu_buffer->write_stamp); + (unsigned long long)*delta, + (unsigned long long)*ts, + (unsigned long long)cpu_buffer->write_stamp); WARN_ON(1); } -- cgit v1.2.3-70-g09d2 From f66af459a931f25807e1df7915b2b66bb5978d82 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Oct 2008 19:14:55 +0200 Subject: tracepoint: check if the probe has been registered Impact: fix kernel crash that can trigger during tracing If we try to remove a probe that has not been already registered, the tracepoint_entry_remove_probe() function will dereference a NULL pointer. Check the probe before removing it to avoid crashes. Signed-off-by: Frederic Weisbecker Acked-by: Mathieu Desnoyers Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/tracepoint.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index f2b7c28a470..af8c8566488 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -131,6 +131,9 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) old = entry->funcs; + if (!old) + return NULL; + debug_print_probes(entry); /* (N -> M), (N > 1, M >= 0) probes */ for (nr_probes = 0; old[nr_probes]; nr_probes++) { @@ -388,6 +391,11 @@ int tracepoint_probe_unregister(const char *name, void *probe) if (entry->rcu_pending) rcu_barrier_sched(); old = tracepoint_entry_remove_probe(entry, probe); + if (!old) { + printk(KERN_WARNING "Warning: Trying to unregister a probe" + "that doesn't exist\n"); + goto end; + } mutex_unlock(&tracepoints_mutex); tracepoint_update_probes(); /* may update entry */ mutex_lock(&tracepoints_mutex); -- cgit v1.2.3-70-g09d2 From ea31e72d753e5817a97de552f152d0cb55c7defc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Oct 2008 19:26:23 +0200 Subject: tracing/ftrace: make boot tracer select the sched_switch tracer Impact: build fix If the boot tracer is selected but not the sched_switch, there will be a build failure: kernel/built-in.o: In function `boot_trace_init': trace_boot.c:(.text+0x5ee38): undefined reference to `sched_switch_trace' kernel/built-in.o: In function `disable_boot_trace': (.text+0x5eee1): undefined reference to `tracing_stop_cmdline_record' kernel/built-in.o: In function `enable_boot_trace': (.text+0x5ef11): undefined reference to `tracing_start_cmdline_record' This patch fixes it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index bc535cb91de..e0cea282e0c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -123,6 +123,7 @@ config BOOT_TRACER bool "Trace boot initcalls" depends on DEBUG_KERNEL select TRACING + select CONTEXT_SWITCH_TRACER help This tracer helps developers to optimize boot times: it records the timings of the initcalls and traces key events and the identity -- cgit v1.2.3-70-g09d2 From 21798a84ab383cdac0e7ee3368e0792b718b867d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 28 Oct 2008 09:43:26 +0100 Subject: tracing: fix a build error on alpha Impact: build fix on Alpha When tracing is enabled, some arch have included on their but others like alpha or m68k don't. Build error on alpha: kernel/trace/trace.c: In function 'tracing_cpumask_write': kernel/trace/trace.c:2145: error: implicit declaration of function 'raw_local_irq_disable' kernel/trace/trace.c:2162: error: implicit declaration of function 'raw_local_irq_enable' Tested on Alpha through a cross-compiler (should correct a similar issue on m68k). Reported-by: Alexey Dobriyan Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06951e22944..bc577dcc0e4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -34,6 +34,7 @@ #include #include +#include #include "trace.h" -- cgit v1.2.3-70-g09d2 From 46fec7ac40e452a2ea5e63648d98b6bb2b5898f9 Mon Sep 17 00:00:00 2001 From: qinghuang feng Date: Tue, 28 Oct 2008 17:24:28 +0800 Subject: lockdep: minor fix for debug_show_all_locks() When we failed to get tasklist_lock eventually (count equals 0), we should only print " ignoring it.\n", and not print " locked it.\n" needlessly. Signed-off-by: Qinghuang Feng Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index dbda475b13b..11832acdde7 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3417,9 +3417,10 @@ retry: } printk(" ignoring it.\n"); unlock = 0; + } else { + if (count != 10) + printk(KERN_CONT " locked it.\n"); } - if (count != 10) - printk(" locked it.\n"); do_each_thread(g, p) { /* -- cgit v1.2.3-70-g09d2 From 6afe40b4dace385d7ba2faf24b352f066f3b71bf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Oct 2008 11:14:58 +0100 Subject: lockdep: fix irqs on/off ip tracing Impact: fix lockdep lock-api-caller output when irqsoff tracing is enabled 81d68a96 "ftrace: trace irq disabled critical timings" added wrappers around trace_hardirqs_on/off_caller. However these functions use __builtin_return_address(0) to figure out which function actually disabled or enabled irqs. The result is that we save the ips of trace_hardirqs_on/off instead of the real caller. Not very helpful. However since the patch from Steven the ip already gets passed. So use that and get rid of __builtin_return_address(0) in these two functions. Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 11832acdde7..06e157119d2 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2169,12 +2169,11 @@ void early_boot_irqs_on(void) /* * Hardirqs will be enabled: */ -void trace_hardirqs_on_caller(unsigned long a0) +void trace_hardirqs_on_caller(unsigned long ip) { struct task_struct *curr = current; - unsigned long ip; - time_hardirqs_on(CALLER_ADDR0, a0); + time_hardirqs_on(CALLER_ADDR0, ip); if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -2188,7 +2187,6 @@ void trace_hardirqs_on_caller(unsigned long a0) } /* we'll do an OFF -> ON transition: */ curr->hardirqs_enabled = 1; - ip = (unsigned long) __builtin_return_address(0); if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; @@ -2224,11 +2222,11 @@ EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void trace_hardirqs_off_caller(unsigned long a0) +void trace_hardirqs_off_caller(unsigned long ip) { struct task_struct *curr = current; - time_hardirqs_off(CALLER_ADDR0, a0); + time_hardirqs_off(CALLER_ADDR0, ip); if (unlikely(!debug_locks || current->lockdep_recursion)) return; @@ -2241,7 +2239,7 @@ void trace_hardirqs_off_caller(unsigned long a0) * We have done an ON -> OFF transition: */ curr->hardirqs_enabled = 0; - curr->hardirq_disable_ip = _RET_IP_; + curr->hardirq_disable_ip = ip; curr->hardirq_disable_event = ++curr->irq_events; debug_atomic_inc(&hardirqs_off_events); } else -- cgit v1.2.3-70-g09d2 From 60063a66236c15f5613f91390631e06718689782 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Oct 2008 10:44:24 -0400 Subject: ftrace: fix current_tracer error return The commit (in linux-tip) c2931e05ec5965597cbfb79ad332d4a29aeceb23 ( ftrace: return an error when setting a nonexistent tracer ) added useful code that would error when a bad tracer was written into the current_tracer file. But this had a bug if the amount written was more than the amount read by that code. The first iteration would set the tracer correctly, but since it did not consume the rest of what was written (usually whitespace), the userspace utility would continue to write what was not consumed. This second iteration would fail to find a tracer and return -EINVAL. Funny thing is that the tracer would have already been set. This patch just consumes all the data that is written to the file. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bc577dcc0e4..a610ca77155 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2377,9 +2377,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, int i; size_t ret; + ret = cnt; + if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; - ret = cnt; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -2412,8 +2413,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); - if (ret == cnt) - filp->f_pos += cnt; + if (ret > 0) + filp->f_pos += ret; return ret; } -- cgit v1.2.3-70-g09d2 From 0b6e4d56bf71866a2b58daa8323cf747988ce7e4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 28 Oct 2008 20:17:38 +0100 Subject: ftrace: perform an initialization for ftrace to enable it Impact: corrects a bug which made the non-dyn function tracer not functional With latest git, the non-dynamic function tracer didn't get any trace. The problem was the fact that ftrace_enabled wasn't initialized to 1 because ftrace hasn't any init function when DYNAMIC_FTRACE is disabled. So when a tracer tries to register an ftrace_ops struct, __register_ftrace_function failed to set the hook. This patch corrects it by setting an init function to initialize ftrace during the boot. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7618c528756..4a39d24568c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1339,6 +1339,14 @@ void __init ftrace_init(void) } #else + +static int __init ftrace_nodyn_init(void) +{ + ftrace_enabled = 1; + return 0; +} +device_initcall(ftrace_nodyn_init); + # define ftrace_startup() do { } while (0) # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) -- cgit v1.2.3-70-g09d2 From f3384b28a05624783b53836ccfed95ecde66a7ad Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Oct 2008 11:15:57 -0400 Subject: ftrace: fix trace_nop config select Impact: build fix on non-function-tracing architectures The trace_nop is the tracer that is defined when no tracer is set in the ftrace infrastructure. The trace_nop was mistakenly selected by HAVE_FTRACE due to the confusion between ftrace infrastructure and the ftrace function tracer (which has been solved by renaming the function tracer). This patch changes the select to the approriate TRACING. This patch should fix compile errors on architectures that do not define the FUNCTION_TRACER. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e0cea282e0c..b58f43bec36 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -8,7 +8,6 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool - select NOP_TRACER config HAVE_DYNAMIC_FTRACE bool @@ -28,6 +27,7 @@ config TRACING select RING_BUFFER select STACKTRACE select TRACEPOINTS + select NOP_TRACER menu "Tracers" -- cgit v1.2.3-70-g09d2 From 7ccb97437bcc818d0ba6067513475f6ee8177a15 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:51 -0700 Subject: freezer_cg: fix improper BUG_ON() causing oops The BUG_ON() should be protected by freezer->lock, otherwise it can be triggered easily when a task has been unfreezed but the corresponding cgroup hasn't been changed to FROZEN state. Signed-off-by: Li Zefan Acked-by: Cedric Le Goater Acked-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e9505695449..7f54d1c4295 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -190,8 +190,9 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) freezer = task_freezer(task); task_unlock(task); - BUG_ON(freezer->state == CGROUP_FROZEN); spin_lock_irq(&freezer->lock); + BUG_ON(freezer->state == CGROUP_FROZEN); + /* Locking avoids race with FREEZING -> THAWED transitions. */ if (freezer->state == CGROUP_FREEZING) freeze_task(task, true); -- cgit v1.2.3-70-g09d2 From 80a6a2cf3bebcf20285cf05373b9c5ec96816577 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:52 -0700 Subject: freezer_cg: remove redundant check in freezer_can_attach() It is sufficient to check if @task is frozen, and no need to check if the original freezer is frozen. Signed-off-by: Li Zefan Acked-by: Cedric Le Goater Acked-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 7f54d1c4295..e9c856a265c 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss, struct task_struct *task) { struct freezer *freezer; - int retval; - /* Anything frozen can't move or be moved to/from */ + /* + * Anything frozen can't move or be moved to/from. + * + * Since orig_freezer->state == FROZEN means that @task has been + * frozen, so it's sufficient to check the latter condition. + */ if (is_task_frozen_enough(task)) return -EBUSY; @@ -173,13 +177,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, if (freezer->state == CGROUP_FROZEN) return -EBUSY; - retval = 0; - task_lock(task); - freezer = task_freezer(task); - if (freezer->state == CGROUP_FROZEN) - retval = -EBUSY; - task_unlock(task); - return retval; + return 0; } static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) -- cgit v1.2.3-70-g09d2 From 00c2e63c31d0f431952ff2a671c5c6997dd4f8b2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:53 -0700 Subject: freezer_cg: use thaw_process() in unfreeze_cgroup() Don't duplicate the implementation of thaw_process(). [akpm@linux-foundation.org: make __thaw_process() static] Signed-off-by: Li Zefan Cc: Cedric Le Goater Acked-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 5 ----- kernel/cgroup_freezer.c | 15 ++++----------- kernel/freezer.c | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8f225339eee..5a361f85cfe 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -44,11 +44,6 @@ static inline bool should_send_signal(struct task_struct *p) return !(p->flags & PF_FREEZER_NOSIG); } -/* - * Wake up a frozen process - */ -extern int __thaw_process(struct task_struct *p); - /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e9c856a265c..5e6d26b66e8 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -275,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) return num_cant_freeze_now ? -EBUSY : 0; } -static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) { struct cgroup_iter it; struct task_struct *task; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { - int do_wake; - - task_lock(task); - do_wake = __thaw_process(task); - task_unlock(task); - if (do_wake) - wake_up_process(task); + thaw_process(task); } cgroup_iter_end(cgroup, &it); - freezer->state = CGROUP_THAWED; - return 0; + freezer->state = CGROUP_THAWED; } static int freezer_change_state(struct cgroup *cgroup, @@ -320,7 +313,7 @@ static int freezer_change_state(struct cgroup *cgroup, } /* state == FREEZING and goal_state == THAWED, so unfreeze */ case CGROUP_FROZEN: - retval = unfreeze_cgroup(cgroup, freezer); + unfreeze_cgroup(cgroup, freezer); break; default: break; diff --git a/kernel/freezer.c b/kernel/freezer.c index ba6248b323e..2f4936cf708 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p) } } -/* - * Wake up a frozen process - * - * task_lock() is needed to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. - */ -int __thaw_process(struct task_struct *p) +static int __thaw_process(struct task_struct *p) { if (frozen(p)) { p->flags &= ~PF_FROZEN; @@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p) return 0; } +/* + * Wake up a frozen process + * + * task_lock() is needed to prevent the race with refrigerator() which may + * occur if the freezing of tasks fails. Namely, without the lock, if the + * freezing of tasks failed, thaw_tasks() might have run before a task in + * refrigerator() could call frozen_process(), in which case the task would be + * frozen and no one would thaw it. + */ int thaw_process(struct task_struct *p) { task_lock(p); -- cgit v1.2.3-70-g09d2 From 51308ee59dee1136ed599d875ea8968d7be55c91 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 29 Oct 2008 14:00:54 -0700 Subject: freezer_cg: simplify freezer_change_state() Just call unfreeze_cgroup() if goal_state == THAWED, and call try_to_freeze_cgroup() if goal_state == FROZEN. No behavior has been changed. Signed-off-by: Li Zefan Acked-by: Cedric Le Goater Acked-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup_freezer.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 5e6d26b66e8..7fa476f01d0 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -296,27 +296,22 @@ static int freezer_change_state(struct cgroup *cgroup, int retval = 0; freezer = cgroup_freezer(cgroup); + spin_lock_irq(&freezer->lock); + update_freezer_state(cgroup, freezer); if (goal_state == freezer->state) goto out; - switch (freezer->state) { + + switch (goal_state) { case CGROUP_THAWED: - retval = try_to_freeze_cgroup(cgroup, freezer); + unfreeze_cgroup(cgroup, freezer); break; - case CGROUP_FREEZING: - if (goal_state == CGROUP_FROZEN) { - /* Userspace is retrying after - * "/bin/echo FROZEN > freezer.state" returned -EBUSY */ - retval = try_to_freeze_cgroup(cgroup, freezer); - break; - } - /* state == FREEZING and goal_state == THAWED, so unfreeze */ case CGROUP_FROZEN: - unfreeze_cgroup(cgroup, freezer); + retval = try_to_freeze_cgroup(cgroup, freezer); break; default: - break; + BUG(); } out: spin_unlock_irq(&freezer->lock); -- cgit v1.2.3-70-g09d2 From ce05fcc30ea41c85f9d50bee1ce289f7cb7fb223 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 29 Oct 2008 14:01:07 -0700 Subject: kernel/profile: fix profile_init() section mismatch profile_init() calls in to alloc_bootmem() on early initialization. While alloc_bootmem() is __init, the reference itself is safe in that it is tucked below a !slab_is_available() check. So, flag profile_init() as __ref. Signed-off-by: Paul Mundt Cc: Dave Hansen Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/profile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/profile.c b/kernel/profile.c index a9e422df6bf..9830a037d8d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -102,7 +102,7 @@ int profile_setup(char *str) __setup("profile=", profile_setup); -int profile_init(void) +int __ref profile_init(void) { int buffer_bytes; if (!prof_on) -- cgit v1.2.3-70-g09d2 From d25141a818383b3c3b09f065698c544a7a0ec6e7 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 29 Oct 2008 14:01:11 -0700 Subject: 'kill sig -1' must only apply to caller's namespace Currently "kill -1" kills processes in all namespaces and breaks the isolation of namespaces. Earlier attempt to fix this was discussed at: http://lkml.org/lkml/2008/7/23/148 As suggested by Oleg Nesterov in that thread, use "task_pid_vnr() > 1" check since task_pid_vnr() returns 0 if process is outside the caller's namespace. Signed-off-by: Sukadev Bhattiprolu Acked-by: Eric W. Biederman Tested-by: Daniel Hokka Zakrisson Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 105217da5c8..4530fc65445 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1144,7 +1144,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid) struct task_struct * p; for_each_process(p) { - if (p->pid > 1 && !same_thread_group(p, current)) { + if (task_pid_vnr(p) > 1 && + !same_thread_group(p, current)) { int err = group_send_sig_info(sig, info, p); ++count; if (err != -EPERM) -- cgit v1.2.3-70-g09d2