From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Dec 2008 20:12:29 +0100 Subject: performance counters: core code Implement the core kernel bits of Performance Counters subsystem. The Linux Performance Counter subsystem provides an abstraction of performance counter hardware capabilities. It provides per task and per CPU counters, and it provides event capabilities on top of those. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. The special file descriptor is opened via the perf_counter_open() system call: int perf_counter_open(u32 hw_event_type, u32 hw_event_period, u32 record_type, pid_t pid, int cpu); The syscall returns the new fd. The fd can be used via the normal VFS system calls: read() can be used to read the counter, fcntl() can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. See more details in Documentation/perf-counters.txt. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 2a372a0e206..441fadff1fa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -975,6 +975,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; rt_mutex_init_task(p); + perf_counter_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); -- cgit v1.2.3-70-g09d2 From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:48 +0200 Subject: x86, ptrace: add bts context unconditionally Add the ptrace bts context field to task_struct unconditionally. Initialize the field directly in copy_process(). Remove all the unneeded functionality used to initialize that field. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144603.292754000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ptrace.h | 9 ++++----- arch/x86/kernel/ptrace.c | 20 +------------------- include/linux/ptrace.h | 10 ---------- include/linux/sched.h | 2 -- kernel/fork.c | 4 ++-- kernel/ptrace.c | 10 ---------- 6 files changed, 7 insertions(+), 48 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index e304b66abee..5cdd19f20b5 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -extern void x86_ptrace_untrace(struct task_struct *); -extern void x86_ptrace_fork(struct task_struct *child, - unsigned long clone_flags); +#ifdef CONFIG_X86_PTRACE_BTS +extern void ptrace_bts_untrace(struct task_struct *tsk); -#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) -#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) +#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) +#endif /* CONFIG_X86_PTRACE_BTS */ #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index adbb24322d8..b32a8ee5338 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -887,37 +887,19 @@ static int ptrace_bts_size(struct task_struct *child) return (trace->ds.top - trace->ds.begin) / trace->ds.size; } -static inline void ptrace_bts_fork(struct task_struct *tsk) -{ - tsk->bts = NULL; -} - /* * Called from __ptrace_unlink() after the child has been moved back * to its original parent. */ -static inline void ptrace_bts_untrace(struct task_struct *child) +void ptrace_bts_untrace(struct task_struct *child) { if (unlikely(child->bts)) { free_bts_context(child->bts); child->bts = NULL; } } -#else -static inline void ptrace_bts_fork(struct task_struct *tsk) {} -static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ -void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - ptrace_bts_fork(child); -} - -void x86_ptrace_untrace(struct task_struct *child) -{ - ptrace_bts_untrace(child); -} - /* * Called by kernel/ptrace.c when detaching.. * diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c15653fc2..59e133d39d5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_untrace(task) do { } while (0) #endif -#ifndef arch_ptrace_fork -/* - * Do machine-specific work to initialize a new task. - * - * This is called from copy_process(). - */ -#define arch_ptrace_fork(child, clone_flags) do { } while (0) -#endif - extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/sched.h b/include/linux/sched.h index 52b8cd049c2..451186a22ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1205,13 +1205,11 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS /* * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ struct bts_context *bts; -#endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; diff --git a/kernel/fork.c b/kernel/fork.c index 660c2b8765b..69bde7a22e9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1086,8 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif - if (unlikely(current->ptrace)) - ptrace_fork(p, clone_flags); + + p->bts = NULL; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aaad0ec3419..321127d965c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -26,16 +26,6 @@ #include -/* - * Initialize a new task whose father had been ptraced. - * - * Called from copy_process(). - */ -void ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - arch_ptrace_fork(child, clone_flags); -} - /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. -- cgit v1.2.3-70-g09d2 From a8d154b009168337494fbf345671bab74d3e4b8b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 09:36:00 -0400 Subject: tracing: create automated trace defines This patch lowers the number of places a developer must modify to add new tracepoints. The current method to add a new tracepoint into an existing system is to write the trace point macro in the trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or DECLARE_TRACE, then they must add the same named item into the C file with the macro DEFINE_TRACE(name) and then add the trace point. This change cuts out the needing to add the DEFINE_TRACE(name). Every file that uses the tracepoint must still include the trace/.h file, but the one C file must also add a define before the including of that file. #define CREATE_TRACE_POINTS #include This will cause the trace/mytrace.h file to also produce the C code necessary to implement the trace point. Note, if more than one trace/.h is used to create the C code it is best to list them all together. #define CREATE_TRACE_POINTS #include #include #include Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with the cleaner solution of the define above the includes over my first design to have the C code include a "special" header. This patch converts sched, irq and lockdep and skb to use this new method. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/trace/define_trace.h | 75 ++++++++++++++++++++++++++++++++++++++++++++ include/trace/irq.h | 5 ++- include/trace/kmem.h | 4 ++- include/trace/lockdep.h | 3 ++ include/trace/sched.h | 3 ++ include/trace/skb.h | 3 ++ kernel/exit.c | 4 --- kernel/fork.c | 2 -- kernel/irq/handle.c | 7 ++--- kernel/kthread.c | 3 -- kernel/lockdep.c | 12 ++----- kernel/sched.c | 10 ++---- kernel/signal.c | 2 -- kernel/softirq.c | 3 -- mm/util.c | 11 ++----- net/core/net-traces.c | 4 +-- 16 files changed, 105 insertions(+), 46 deletions(-) create mode 100644 include/trace/define_trace.h (limited to 'kernel/fork.c') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h new file mode 100644 index 00000000000..de9dc7d8508 --- /dev/null +++ b/include/trace/define_trace.h @@ -0,0 +1,75 @@ +/* + * Trace files that want to automate creationg of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_TRACE_POINTS + +/* Prevent recursion */ +#undef CREATE_TRACE_POINTS + +#include + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DEFINE_TRACE(name) + +#undef TRACE_FORMAT +#define TRACE_FORMAT(name, proto, args, print) \ + DEFINE_TRACE(name) + +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) \ + DEFINE_TRACE(name) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) +# define UNDEF_TRACE_INCLUDE_FILE +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_HEADER_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_HEADER_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +/* We may be processing more files */ +#define CREATE_TRACE_POINTS + +#endif /* CREATE_TRACE_POINTS */ diff --git a/include/trace/irq.h b/include/trace/irq.h index 04ab4c65222..75e3468e449 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -51,4 +51,7 @@ TRACE_FORMAT(softirq_exit, TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) ); -#endif +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h index d7d12189e5c..c22c42f980b 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -188,5 +188,7 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); +#endif /* _TRACE_KMEM_H */ -#endif +/* This part must be outside protection */ +#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 8ee7900b38c..4d301e758de 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -55,3 +55,6 @@ TRACE_EVENT(lock_acquired, #endif #endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/sched.h b/include/trace/sched.h index 5b1cf4a2846..ffa1cab586b 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -334,3 +334,6 @@ TRACE_EVENT(sched_signal_send, ); #endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/skb.h b/include/trace/skb.h index e6fd281f7f8..1e8fabb57c0 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -35,3 +35,6 @@ TRACE_EVENT(kfree_skb, ); #endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/exit.c b/kernel/exit.c index abf9cf3b95c..2fe9d2c7eee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,10 +56,6 @@ #include #include "cred-internals.h" -DEFINE_TRACE(sched_process_free); -DEFINE_TRACE(sched_process_exit); -DEFINE_TRACE(sched_process_wait); - static void exit_mm(struct task_struct * tsk); static void __unhash_process(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd0072..4bebf263923 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -83,8 +83,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -DEFINE_TRACE(sched_process_fork); - int nr_processes(void) { int cpu; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142be8dd..983d8be8dff 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -17,9 +17,11 @@ #include #include #include -#include #include +#define CREATE_TRACE_POINTS +#include + #include "internals.h" /* @@ -348,9 +350,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) "but no thread function available.", irq, action->name); } -DEFINE_TRACE(irq_handler_entry); -DEFINE_TRACE(irq_handler_exit); - /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ebaf8519ab..e1c76924545 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; -DEFINE_TRACE(sched_kthread_stop); -DEFINE_TRACE(sched_kthread_stop_ret); - struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c4582a6ea95..257f21a76c5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -42,12 +42,14 @@ #include #include #include -#include #include #include "lockdep_internals.h" +#define CREATE_TRACE_POINTS +#include + #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -2929,8 +2931,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name, } EXPORT_SYMBOL_GPL(lock_set_class); -DEFINE_TRACE(lock_acquire); - /* * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: @@ -2957,8 +2957,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, } EXPORT_SYMBOL_GPL(lock_acquire); -DEFINE_TRACE(lock_release); - void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { @@ -3061,8 +3059,6 @@ found_it: put_lock_stats(stats); } -DEFINE_TRACE(lock_acquired); - static void __lock_acquired(struct lockdep_map *lock, unsigned long ip) { @@ -3118,8 +3114,6 @@ found_it: lock->ip = ip; } -DEFINE_TRACE(lock_contended); - void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; diff --git a/kernel/sched.c b/kernel/sched.c index 5724508c3b6..e6d4518d47e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,13 +72,15 @@ #include #include #include -#include #include #include #include "sched_cpupri.h" +#define CREATE_TRACE_POINTS +#include + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -118,12 +120,6 @@ */ #define RUNTIME_INF ((u64)~0ULL) -DEFINE_TRACE(sched_wait_task); -DEFINE_TRACE(sched_wakeup); -DEFINE_TRACE(sched_wakeup_new); -DEFINE_TRACE(sched_switch); -DEFINE_TRACE(sched_migrate_task); - #ifdef CONFIG_SMP static void double_rq_lock(struct rq *rq1, struct rq *rq2); diff --git a/kernel/signal.c b/kernel/signal.c index d8034737db4..1d5703ff003 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,8 +41,6 @@ static struct kmem_cache *sigqueue_cachep; -DEFINE_TRACE(sched_signal_send); - static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/kernel/softirq.c b/kernel/softirq.c index 2fecefacdc5..a2d9b458ac2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -186,9 +186,6 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 -DEFINE_TRACE(softirq_entry); -DEFINE_TRACE(softirq_exit); - asmlinkage void __do_softirq(void) { struct softirq_action *h; diff --git a/mm/util.c b/mm/util.c index 2599e83eea1..0e74a22791c 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,9 +4,11 @@ #include #include #include -#include #include +#define CREATE_TRACE_POINTS +#include + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate @@ -239,13 +241,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, EXPORT_SYMBOL_GPL(get_user_pages_fast); /* Tracepoints definitions. */ -DEFINE_TRACE(kmalloc); -DEFINE_TRACE(kmem_cache_alloc); -DEFINE_TRACE(kmalloc_node); -DEFINE_TRACE(kmem_cache_alloc_node); -DEFINE_TRACE(kfree); -DEFINE_TRACE(kmem_cache_free); - EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index c8fb45665e4..80177205947 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -19,11 +19,11 @@ #include #include #include -#include #include #include +#define CREATE_TRACE_POINTS +#include -DEFINE_TRACE(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); -- cgit v1.2.3-70-g09d2 From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 19:39:12 -0400 Subject: tracing/events: move trace point headers into include/trace/events Impact: clean up Create a sub directory in include/trace called events to keep the trace point headers in their own separate directory. Only headers that declare trace points should be defined in this directory. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt --- include/linux/kmemtrace.h | 2 +- include/trace/define_trace.h | 2 +- include/trace/events/irq.h | 57 +++++++ include/trace/events/kmem.h | 194 ++++++++++++++++++++++ include/trace/events/lockdep.h | 60 +++++++ include/trace/events/sched.h | 339 ++++++++++++++++++++++++++++++++++++++ include/trace/events/skb.h | 40 +++++ include/trace/irq.h | 57 ------- include/trace/kmem.h | 194 ---------------------- include/trace/lockdep.h | 60 ------- include/trace/sched.h | 339 -------------------------------------- include/trace/skb.h | 40 ----- kernel/exit.c | 2 +- kernel/fork.c | 3 +- kernel/irq/handle.c | 2 +- kernel/kthread.c | 2 +- kernel/lockdep.c | 2 +- kernel/sched.c | 2 +- kernel/signal.c | 2 +- kernel/softirq.c | 2 +- kernel/trace/ftrace.c | 2 +- kernel/trace/trace_sched_switch.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- mm/util.c | 2 +- net/core/drop_monitor.c | 2 +- net/core/net-traces.c | 2 +- net/core/skbuff.c | 2 +- 27 files changed, 708 insertions(+), 707 deletions(-) create mode 100644 include/trace/events/irq.h create mode 100644 include/trace/events/kmem.h create mode 100644 include/trace/events/lockdep.h create mode 100644 include/trace/events/sched.h create mode 100644 include/trace/events/skb.h delete mode 100644 include/trace/irq.h delete mode 100644 include/trace/kmem.h delete mode 100644 include/trace/lockdep.h delete mode 100644 include/trace/sched.h delete mode 100644 include/trace/skb.h (limited to 'kernel/fork.c') diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h index 15c45a27a92..b616d3930c3 100644 --- a/include/linux/kmemtrace.h +++ b/include/linux/kmemtrace.h @@ -9,7 +9,7 @@ #ifdef __KERNEL__ -#include +#include #ifdef CONFIG_KMEMTRACE extern void kmemtrace_init(void); diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 980eb66a6e3..18869417109 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -43,7 +43,7 @@ #endif #ifndef TRACE_INCLUDE_PATH -# define __TRACE_INCLUDE(system) +# define __TRACE_INCLUDE(system) # define UNDEF_TRACE_INCLUDE_FILE #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 00000000000..75e3468e449 --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 00000000000..c22c42f980b --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,194 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 00000000000..45e326b5c7f --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,60 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h new file mode 100644 index 00000000000..ffa1cab586b --- /dev/null +++ b/include/trace/events/sched.h @@ -0,0 +1,339 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched + +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 00000000000..1e8fabb57c0 --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index 75e3468e449..00000000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,57 +0,0 @@ -#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_IRQ_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#endif /* _TRACE_IRQ_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h deleted file mode 100644 index c22c42f980b..00000000000 --- a/include/trace/kmem.h +++ /dev/null @@ -1,194 +0,0 @@ -#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KMEM_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); -#endif /* _TRACE_KMEM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 45e326b5c7f..00000000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,60 +0,0 @@ -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index ffa1cab586b..00000000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,339 +0,0 @@ -#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SCHED_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#endif /* _TRACE_SCHED_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index 1e8fabb57c0..00000000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,40 +0,0 @@ -#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SKB_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#endif /* _TRACE_SKB_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/exit.c b/kernel/exit.c index 2fe9d2c7eee..cab535c427b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 4bebf263923..085f73ebcea 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include @@ -71,6 +70,8 @@ #include #include +#include + /* * Protected counters by write_lock_irq(&tasklist_lock) */ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 983d8be8dff..37c63633e78 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -20,7 +20,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include #include "internals.h" diff --git a/kernel/kthread.c b/kernel/kthread.c index e1c76924545..41c88fe4050 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #define KTHREAD_NICE_LEVEL (-5) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 257f21a76c5..47b201ecc6d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -48,7 +48,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; diff --git a/kernel/sched.c b/kernel/sched.c index e6d4518d47e..9f7ffd00b6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -79,7 +79,7 @@ #include "sched_cpupri.h" #define CREATE_TRACE_POINTS -#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] diff --git a/kernel/signal.c b/kernel/signal.c index 1d5703ff003..94ec0a4dde0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/softirq.c b/kernel/softirq.c index a2d9b458ac2..7ab9dfd8d08 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e6a0b5c994..a2348898858 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9d8cccdfaa0..a98106dd979 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5bc00e8f153..b8b13c5540f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/mm/util.c b/mm/util.c index 0e74a22791c..6794a336e9a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,7 +7,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include /** * kstrdup - allocate space for and copy an existing string diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9fd0dc3cca9..b75b6cea49d 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 80177205947..499a67eaf3a 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -24,6 +24,6 @@ #include #define CREATE_TRACE_POINTS -#include +#include EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356cd9f7..12806b84445 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include "kmap_skb.h" -- cgit v1.2.3-70-g09d2 From a63eaf34ae60bdb067a354cc8def2e8f4a01f5f4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 22 May 2009 14:17:31 +1000 Subject: perf_counter: Dynamically allocate tasks' perf_counter_context struct This replaces the struct perf_counter_context in the task_struct with a pointer to a dynamically allocated perf_counter_context struct. The main reason for doing is this is to allow us to transfer a perf_counter_context from one task to another when we do lazy PMU switching in a later patch. This has a few side-benefits: the task_struct becomes a little smaller, we save some memory because only tasks that have perf_counters attached get a perf_counter_context allocated for them, and we can remove the inclusion of in sched.h, meaning that we don't end up recompiling nearly everything whenever perf_counter.h changes. The perf_counter_context structures are reference-counted and freed when the last reference is dropped. A context can have references from its task and the counters on its task. Counters can outlive the task so it is possible that a context will be freed well after its task has exited. Contexts are allocated on fork if the parent had a context, or otherwise the first time that a per-task counter is created on a task. In the latter case, we set the context pointer in the task struct locklessly using an atomic compare-and-exchange operation in case we raced with some other task in creating a context for the subject task. This also removes the task pointer from the perf_counter struct. The task pointer was not used anywhere and would make it harder to move a context from one task to another. Anything that needed to know which task a counter was attached to was already using counter->ctx->task. The __perf_counter_init_context function moves up in perf_counter.c so that it can be called from find_get_context, and now initializes the refcount, but is otherwise unchanged. We were potentially calling list_del_counter twice: once from __perf_counter_exit_task when the task exits and once from __perf_counter_remove_from_context when the counter's fd gets closed. This adds a check in list_del_counter so it doesn't do anything if the counter has already been removed from the lists. Since perf_counter_task_sched_in doesn't do anything if the task doesn't have a context, and leaves cpuctx->task_ctx = NULL, this adds code to __perf_install_in_context to set cpuctx->task_ctx if necessary, i.e. in the case where the current task adds the first counter to itself and thus creates a context for itself. This also adds similar code to __perf_counter_enable to handle a similar situation which can arise when the counters have been disabled using prctl; that also leaves cpuctx->task_ctx = NULL. [ Impact: refactor counter context management to prepare for new feature ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: <18966.10075.781053.231153@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 1 + include/linux/init_task.h | 13 --- include/linux/perf_counter.h | 4 +- include/linux/sched.h | 6 +- kernel/exit.c | 3 +- kernel/fork.c | 1 + kernel/perf_counter.c | 218 +++++++++++++++++++++++++++---------------- 7 files changed, 145 insertions(+), 101 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e9021a90802..b4f64402a82 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -14,6 +14,7 @@ * Mikael Pettersson : PM converted to driver model. */ +#include #include #include #include diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 503afaa0afa..d87247d2641 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,18 +108,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_ctx.counter_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ - .perf_counter_ctx.event_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ - .perf_counter_ctx.lock = \ - __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), -#else -# define INIT_PERF_COUNTERS(tsk) -#endif - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -183,7 +171,6 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f612941ef46..07130900546 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -449,7 +449,6 @@ struct perf_counter { struct hw_perf_counter hw; struct perf_counter_context *ctx; - struct task_struct *task; struct file *filp; struct perf_counter *parent; @@ -498,7 +497,6 @@ struct perf_counter { * Used as a container for task counters and CPU counters as well: */ struct perf_counter_context { -#ifdef CONFIG_PERF_COUNTERS /* * Protect the states of the counters in the list, * nr_active, and the list: @@ -516,6 +514,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + atomic_t refcount; struct task_struct *task; /* @@ -523,7 +522,6 @@ struct perf_counter_context { */ u64 time; u64 timestamp; -#endif }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index ff59d123151..9714d450f41 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,7 +71,6 @@ struct sched_param { #include #include #include -#include #include #include #include @@ -99,6 +98,7 @@ struct robust_list_head; struct bio; struct bts_tracer; struct fs_struct; +struct perf_counter_context; /* * List of flags we want to share for kernel threads, @@ -1387,7 +1387,9 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif - struct perf_counter_context perf_counter_ctx; +#ifdef CONFIG_PERF_COUNTERS + struct perf_counter_context *perf_counter_ctxp; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; diff --git a/kernel/exit.c b/kernel/exit.c index f9dfedd94af..99ad4063ee4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -159,7 +160,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); #ifdef CONFIG_PERF_COUNTERS - WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); + WARN_ON_ONCE(tsk->perf_counter_ctxp); #endif trace_sched_process_free(tsk); put_task_struct(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index d32fef4d38e..e72a09f5355 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 08584c16049..06ea3eae886 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -97,6 +97,17 @@ void perf_enable(void) hw_perf_enable(); } +static void get_ctx(struct perf_counter_context *ctx) +{ + atomic_inc(&ctx->refcount); +} + +static void put_ctx(struct perf_counter_context *ctx) +{ + if (atomic_dec_and_test(&ctx->refcount)) + kfree(ctx); +} + static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { @@ -118,11 +129,17 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) ctx->nr_counters++; } +/* + * Remove a counter from the lists for its context. + * Must be called with counter->mutex and ctx->mutex held. + */ static void list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { struct perf_counter *sibling, *tmp; + if (list_empty(&counter->list_entry)) + return; ctx->nr_counters--; list_del_init(&counter->list_entry); @@ -216,8 +233,6 @@ static void __perf_counter_remove_from_context(void *info) counter_sched_out(counter, cpuctx, ctx); - counter->task = NULL; - list_del_counter(counter, ctx); if (!ctx->task) { @@ -279,7 +294,6 @@ retry: */ if (!list_empty(&counter->list_entry)) { list_del_counter(counter, ctx); - counter->task = NULL; } spin_unlock_irq(&ctx->lock); } @@ -568,11 +582,17 @@ static void __perf_install_in_context(void *info) * If this is a task context, we need to check whether it is * the current task context of this cpu. If not it has been * scheduled out before the smp call arrived. + * Or possibly this is the right context but it isn't + * on this cpu because it had no counters. */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } spin_lock_irqsave(&ctx->lock, flags); + ctx->is_active = 1; update_context_time(ctx); /* @@ -653,7 +673,6 @@ perf_install_in_context(struct perf_counter_context *ctx, return; } - counter->task = task; retry: task_oncpu_function_call(task, __perf_install_in_context, counter); @@ -693,10 +712,14 @@ static void __perf_counter_enable(void *info) * If this is a per-task counter, need to check whether this * counter's task is the current task on this cpu. */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; + if (ctx->task && cpuctx->task_ctx != ctx) { + if (cpuctx->task_ctx || ctx->task != current) + return; + cpuctx->task_ctx = ctx; + } spin_lock_irqsave(&ctx->lock, flags); + ctx->is_active = 1; update_context_time(ctx); counter->prev_state = counter->state; @@ -852,10 +875,10 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, void perf_counter_task_sched_out(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct perf_counter_context *ctx = task->perf_counter_ctxp; struct pt_regs *regs; - if (likely(!cpuctx->task_ctx)) + if (likely(!ctx || !cpuctx->task_ctx)) return; update_context_time(ctx); @@ -871,6 +894,8 @@ static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + if (!cpuctx->task_ctx) + return; __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; } @@ -969,8 +994,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, void perf_counter_task_sched_in(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct perf_counter_context *ctx = task->perf_counter_ctxp; + if (likely(!ctx)) + return; __perf_counter_sched_in(ctx, cpuctx, cpu); cpuctx->task_ctx = ctx; } @@ -985,11 +1012,11 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) int perf_counter_task_disable(void) { struct task_struct *curr = current; - struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter_context *ctx = curr->perf_counter_ctxp; struct perf_counter *counter; unsigned long flags; - if (likely(!ctx->nr_counters)) + if (!ctx || !ctx->nr_counters) return 0; local_irq_save(flags); @@ -1020,12 +1047,12 @@ int perf_counter_task_disable(void) int perf_counter_task_enable(void) { struct task_struct *curr = current; - struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter_context *ctx = curr->perf_counter_ctxp; struct perf_counter *counter; unsigned long flags; int cpu; - if (likely(!ctx->nr_counters)) + if (!ctx || !ctx->nr_counters) return 0; local_irq_save(flags); @@ -1128,19 +1155,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) return; cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = &curr->perf_counter_ctx; + ctx = curr->perf_counter_ctxp; perf_adjust_freq(&cpuctx->ctx); - perf_adjust_freq(ctx); + if (ctx) + perf_adjust_freq(ctx); perf_counter_cpu_sched_out(cpuctx); - __perf_counter_task_sched_out(ctx); + if (ctx) + __perf_counter_task_sched_out(ctx); rotate_ctx(&cpuctx->ctx); - rotate_ctx(ctx); + if (ctx) + rotate_ctx(ctx); perf_counter_cpu_sched_in(cpuctx, cpu); - perf_counter_task_sched_in(curr, cpu); + if (ctx) + perf_counter_task_sched_in(curr, cpu); } /* @@ -1176,6 +1207,22 @@ static u64 perf_counter_read(struct perf_counter *counter) return atomic64_read(&counter->count); } +/* + * Initialize the perf_counter context in a task_struct: + */ +static void +__perf_counter_init_context(struct perf_counter_context *ctx, + struct task_struct *task) +{ + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->lock); + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->counter_list); + INIT_LIST_HEAD(&ctx->event_list); + atomic_set(&ctx->refcount, 1); + ctx->task = task; +} + static void put_context(struct perf_counter_context *ctx) { if (ctx->task) @@ -1186,6 +1233,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) { struct perf_cpu_context *cpuctx; struct perf_counter_context *ctx; + struct perf_counter_context *tctx; struct task_struct *task; /* @@ -1225,15 +1273,36 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) if (!task) return ERR_PTR(-ESRCH); - ctx = &task->perf_counter_ctx; - ctx->task = task; - /* Reuse ptrace permission checks for now. */ if (!ptrace_may_access(task, PTRACE_MODE_READ)) { - put_context(ctx); + put_task_struct(task); return ERR_PTR(-EACCES); } + ctx = task->perf_counter_ctxp; + if (!ctx) { + ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); + if (!ctx) { + put_task_struct(task); + return ERR_PTR(-ENOMEM); + } + __perf_counter_init_context(ctx, task); + /* + * Make sure other cpus see correct values for *ctx + * once task->perf_counter_ctxp is visible to them. + */ + smp_wmb(); + tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx); + if (tctx) { + /* + * We raced with some other task; use + * the context they set. + */ + kfree(ctx); + ctx = tctx; + } + } + return ctx; } @@ -1242,6 +1311,7 @@ static void free_counter_rcu(struct rcu_head *head) struct perf_counter *counter; counter = container_of(head, struct perf_counter, rcu_head); + put_ctx(counter->ctx); kfree(counter); } @@ -2247,7 +2317,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) perf_counter_comm_ctx(&cpuctx->ctx, comm_event); put_cpu_var(perf_cpu_context); - perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); + perf_counter_comm_ctx(current->perf_counter_ctxp, comm_event); } void perf_counter_comm(struct task_struct *task) @@ -2256,7 +2326,9 @@ void perf_counter_comm(struct task_struct *task) if (!atomic_read(&nr_comm_tracking)) return; - + if (!current->perf_counter_ctxp) + return; + comm_event = (struct perf_comm_event){ .task = task, .event = { @@ -2372,7 +2444,7 @@ got_name: perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); put_cpu_var(perf_cpu_context); - perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); + perf_counter_mmap_ctx(current->perf_counter_ctxp, mmap_event); kfree(buf); } @@ -2384,6 +2456,8 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, if (!atomic_read(&nr_mmap_tracking)) return; + if (!current->perf_counter_ctxp) + return; mmap_event = (struct perf_mmap_event){ .file = file, @@ -2985,6 +3059,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->pmu = NULL; counter->ctx = ctx; + get_ctx(ctx); counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) @@ -3149,21 +3224,6 @@ err_put_context: goto out_fput; } -/* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->lock); - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->counter_list); - INIT_LIST_HEAD(&ctx->event_list); - ctx->task = task; -} - /* * inherit a counter from parent task to child task: */ @@ -3195,7 +3255,6 @@ inherit_counter(struct perf_counter *parent_counter, /* * Link it up in the child's context: */ - child_counter->task = child; add_counter_to_ctx(child_counter, child_ctx); child_counter->parent = parent_counter; @@ -3294,40 +3353,15 @@ __perf_counter_exit_task(struct task_struct *child, struct perf_counter *parent_counter; /* - * If we do not self-reap then we have to wait for the - * child task to unschedule (it will happen for sure), - * so that its counter is at its final count. (This - * condition triggers rarely - child tasks usually get - * off their CPU before the parent has a chance to - * get this far into the reaping action) + * Protect against concurrent operations on child_counter + * due its fd getting closed, etc. */ - if (child != current) { - wait_task_inactive(child, 0); - update_counter_times(child_counter); - list_del_counter(child_counter, child_ctx); - } else { - struct perf_cpu_context *cpuctx; - unsigned long flags; - - /* - * Disable and unlink this counter. - * - * Be careful about zapping the list - IRQ/NMI context - * could still be processing it: - */ - local_irq_save(flags); - perf_disable(); - - cpuctx = &__get_cpu_var(perf_cpu_context); + mutex_lock(&child_counter->mutex); - group_sched_out(child_counter, cpuctx, child_ctx); - update_counter_times(child_counter); + update_counter_times(child_counter); + list_del_counter(child_counter, child_ctx); - list_del_counter(child_counter, child_ctx); - - perf_enable(); - local_irq_restore(flags); - } + mutex_unlock(&child_counter->mutex); parent_counter = child_counter->parent; /* @@ -3346,19 +3380,29 @@ __perf_counter_exit_task(struct task_struct *child, * * Note: we may be running in child context, but the PID is not hashed * anymore so new counters will not be added. + * (XXX not sure that is true when we get called from flush_old_exec. + * -- paulus) */ void perf_counter_exit_task(struct task_struct *child) { struct perf_counter *child_counter, *tmp; struct perf_counter_context *child_ctx; + unsigned long flags; WARN_ON_ONCE(child != current); - child_ctx = &child->perf_counter_ctx; + child_ctx = child->perf_counter_ctxp; - if (likely(!child_ctx->nr_counters)) + if (likely(!child_ctx)) return; + local_irq_save(flags); + __perf_counter_task_sched_out(child_ctx); + child->perf_counter_ctxp = NULL; + local_irq_restore(flags); + + mutex_lock(&child_ctx->mutex); + again: list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, list_entry) @@ -3371,6 +3415,10 @@ again: */ if (!list_empty(&child_ctx->counter_list)) goto again; + + mutex_unlock(&child_ctx->mutex); + + put_ctx(child_ctx); } /* @@ -3382,19 +3430,25 @@ void perf_counter_init_task(struct task_struct *child) struct perf_counter *counter; struct task_struct *parent = current; - child_ctx = &child->perf_counter_ctx; - parent_ctx = &parent->perf_counter_ctx; - - __perf_counter_init_context(child_ctx, child); + child->perf_counter_ctxp = NULL; /* * This is executed from the parent task context, so inherit - * counters that have been marked for cloning: + * counters that have been marked for cloning. + * First allocate and initialize a context for the child. */ - if (likely(!parent_ctx->nr_counters)) + child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); + if (!child_ctx) + return; + + parent_ctx = parent->perf_counter_ctxp; + if (likely(!parent_ctx || !parent_ctx->nr_counters)) return; + __perf_counter_init_context(child_ctx, child); + child->perf_counter_ctxp = child_ctx; + /* * Lock the parent list. No need to lock the child - not PID * hashed yet and not running, so nobody can access it. -- cgit v1.2.3-70-g09d2 From e4cbb4e3ac8b09fdb11e39e5a5611bfab0a7cd1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 May 2009 15:50:30 +0200 Subject: perf_counter: Move child perfcounter init to after scheduler init Initialize a task's perfcounters (inherit from parent, etc.) after the child task's scheduler fields have been initialized already. [ Impact: cleanup ] Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index e72a09f5355..675e01e9072 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -984,7 +984,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; rt_mutex_init_task(p); - perf_counter_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); @@ -1096,6 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); + perf_counter_init_task(p); if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; -- cgit v1.2.3-70-g09d2 From 6ab423e0eaca827fbd201ca4ae7d4f8573a366b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2009 14:45:27 +0200 Subject: perf_counter: Propagate inheritance failures down the fork() path Fail fork() when we fail inheritance for some reason (-ENOMEM most likely). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: <20090525124600.324656474@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ++-- kernel/fork.c | 6 +++++- kernel/perf_counter.c | 20 ++++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b1f2bac09f9..d3e85de9bf1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -566,7 +566,7 @@ extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern void perf_counter_init_task(struct task_struct *child); +extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); @@ -631,7 +631,7 @@ perf_counter_task_sched_out(struct task_struct *task, struct task_struct *next, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline void perf_counter_init_task(struct task_struct *child) { } +static inline int perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } diff --git a/kernel/fork.c b/kernel/fork.c index 675e01e9072..c07c3335cea 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1095,7 +1095,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - perf_counter_init_task(p); + + retval = perf_counter_init_task(p); + if (retval) + goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; @@ -1295,6 +1298,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: + perf_counter_exit_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 217dbcce2eb..7a7a144870e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3434,18 +3434,23 @@ again: /* * Initialize the perf_counter context in task_struct */ -void perf_counter_init_task(struct task_struct *child) +int perf_counter_init_task(struct task_struct *child) { struct perf_counter_context *child_ctx, *parent_ctx; struct perf_counter *counter; struct task_struct *parent = current; int inherited_all = 1; + int ret = 0; child->perf_counter_ctxp = NULL; mutex_init(&child->perf_counter_mutex); INIT_LIST_HEAD(&child->perf_counter_list); + parent_ctx = parent->perf_counter_ctxp; + if (likely(!parent_ctx || !parent_ctx->nr_counters)) + return 0; + /* * This is executed from the parent task context, so inherit * counters that have been marked for cloning. @@ -3454,11 +3459,7 @@ void perf_counter_init_task(struct task_struct *child) child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); if (!child_ctx) - return; - - parent_ctx = parent->perf_counter_ctxp; - if (likely(!parent_ctx || !parent_ctx->nr_counters)) - return; + return -ENOMEM; __perf_counter_init_context(child_ctx, child); child->perf_counter_ctxp = child_ctx; @@ -3482,8 +3483,9 @@ void perf_counter_init_task(struct task_struct *child) continue; } - if (inherit_group(counter, parent, - parent_ctx, child, child_ctx)) { + ret = inherit_group(counter, parent, parent_ctx, + child, child_ctx); + if (ret) { inherited_all = 0; break; } @@ -3505,6 +3507,8 @@ void perf_counter_init_task(struct task_struct *child) } mutex_unlock(&parent_ctx->mutex); + + return ret; } static void __cpuinit perf_counter_init_cpu(int cpu) -- cgit v1.2.3-70-g09d2 From bbbee90829304d156c12b171c0ac7e6e1aba8b90 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 May 2009 14:25:58 +0200 Subject: perf_counter: Ammend cleanup in fork() fail When fork() fails we cannot use perf_counter_exit_task() since that assumes to operate on current. Write a new helper that cleans up unused/clean contexts. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/fork.c | 2 +- kernel/perf_counter.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 4 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 717bf3b59ba..519a41bba24 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -579,6 +579,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); +extern void perf_counter_free_task(struct task_struct *task); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); @@ -644,6 +645,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline int perf_counter_init_task(struct task_struct *child) { return 0; } static inline void perf_counter_exit_task(struct task_struct *child) { } +static inline void perf_counter_free_task(struct task_struct *task) { } static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_disable(void) { } diff --git a/kernel/fork.c b/kernel/fork.c index c07c3335cea..23bf757ed32 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1298,7 +1298,7 @@ bad_fork_cleanup_semundo: bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: - perf_counter_exit_task(p); + perf_counter_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0c000d305e0..79c3f26541d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3538,8 +3538,7 @@ static void sync_child_counter(struct perf_counter *child_counter, } static void -__perf_counter_exit_task(struct task_struct *child, - struct perf_counter *child_counter, +__perf_counter_exit_task(struct perf_counter *child_counter, struct perf_counter_context *child_ctx) { struct perf_counter *parent_counter; @@ -3605,7 +3604,7 @@ void perf_counter_exit_task(struct task_struct *child) again: list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, list_entry) - __perf_counter_exit_task(child, child_counter, child_ctx); + __perf_counter_exit_task(child_counter, child_ctx); /* * If the last counter was a group counter, it will have appended all @@ -3620,6 +3619,44 @@ again: put_ctx(child_ctx); } +/* + * free an unexposed, unused context as created by inheritance by + * init_task below, used by fork() in case of fail. + */ +void perf_counter_free_task(struct task_struct *task) +{ + struct perf_counter_context *ctx = task->perf_counter_ctxp; + struct perf_counter *counter, *tmp; + + if (!ctx) + return; + + mutex_lock(&ctx->mutex); +again: + list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) { + struct perf_counter *parent = counter->parent; + + if (WARN_ON_ONCE(!parent)) + continue; + + mutex_lock(&parent->child_mutex); + list_del_init(&counter->child_list); + mutex_unlock(&parent->child_mutex); + + fput(parent->filp); + + list_del_counter(counter, ctx); + free_counter(counter); + } + + if (!list_empty(&ctx->counter_list)) + goto again; + + mutex_unlock(&ctx->mutex); + + put_ctx(ctx); +} + /* * Initialize the perf_counter context in task_struct */ -- cgit v1.2.3-70-g09d2 From f7e8b616ed1cc6f790b82324bce8a2a60295e5c2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Jun 2009 16:39:48 -0400 Subject: function-graph: move initialization of new tasks up in fork When the function graph tracer is enabled, all new tasks must allocate a ret_stack to place the return address of functions. This is because the function graph tracer will replace the real return address with a call to the tracing of the exit function. This initialization happens in fork, but it happens too late. If fork fails, then it will call free_task and that calls the freeing of this ret_stack. But before initialization happens, the new (failed) task points to its parents ret_stack. If a fork failure happens during the function trace, it would be catastrophic for the parent. Also, there's no need to call ftrace_graph_exit_task from fork, since it is called by free_task which fork calls on failure. [ Impact: prevent crash during failed fork running function graph tracer ] Signed-off-by: Steven Rostedt --- kernel/fork.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd0072..c4b1e35c430 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -982,6 +982,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!p) goto fork_out; + ftrace_graph_init_task(p); + rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING @@ -1131,8 +1133,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, } } - ftrace_graph_init_task(p); - p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) @@ -1141,7 +1141,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) - goto bad_fork_free_graph; + goto bad_fork_free_pid; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; @@ -1233,7 +1233,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_graph; + goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { @@ -1268,8 +1268,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, cgroup_post_fork(p); return p; -bad_fork_free_graph: - ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); -- cgit v1.2.3-70-g09d2 From 226f62fdd53d5b2c74e242aa11f6ad43d0285d3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2009 11:23:56 +0200 Subject: perf_counter: Add a comm hook for pure fork()s I noticed missing COMM events and found that we missed reporting them for pure forks. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/fork.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 23bf757ed32..b7d7a9f0bd7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1412,6 +1412,12 @@ long do_fork(unsigned long clone_flags, if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); + } else { + /* + * vfork will do an exec which will call + * set_task_comm() + */ + perf_counter_comm(p); } audit_finish_fork(p); -- cgit v1.2.3-70-g09d2 From 60313ebed739b331e8e61079da27a11ee3b73a30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2009 16:53:44 +0200 Subject: perf_counter: Add fork event Create a fork event so that we can easily clone the comm and dso maps without having to generate all those events. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 ++++ kernel/fork.c | 4 +- kernel/perf_counter.c | 131 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 126 insertions(+), 19 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 37d5541d74c..380247bdb91 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -276,6 +276,14 @@ enum perf_event_type { PERF_EVENT_THROTTLE = 5, PERF_EVENT_UNTHROTTLE = 6, + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * }; + */ + PERF_EVENT_FORK = 7, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* @@ -618,6 +626,7 @@ extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); extern void perf_counter_comm(struct task_struct *tsk); +extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); @@ -673,6 +682,7 @@ perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } static inline void perf_counter_comm(struct task_struct *tsk) { } +static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } static inline void perf_counter_task_migration(struct task_struct *task, int cpu) { } diff --git a/kernel/fork.c b/kernel/fork.c index b7d7a9f0bd7..f4466ca37ec 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1412,12 +1412,12 @@ long do_fork(unsigned long clone_flags, if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); - } else { + } else if (!(clone_flags & CLONE_VM)) { /* * vfork will do an exec which will call * set_task_comm() */ - perf_counter_comm(p); + perf_counter_fork(p); } audit_finish_fork(p); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0bb03f15a5b..78c58623a0d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -40,9 +40,9 @@ static int perf_reserved_percpu __read_mostly; static int perf_overcommit __read_mostly = 1; static atomic_t nr_counters __read_mostly; -static atomic_t nr_mmap_tracking __read_mostly; -static atomic_t nr_munmap_tracking __read_mostly; -static atomic_t nr_comm_tracking __read_mostly; +static atomic_t nr_mmap_counters __read_mostly; +static atomic_t nr_munmap_counters __read_mostly; +static atomic_t nr_comm_counters __read_mostly; int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ @@ -1447,11 +1447,11 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_counters); if (counter->attr.mmap) - atomic_dec(&nr_mmap_tracking); + atomic_dec(&nr_mmap_counters); if (counter->attr.munmap) - atomic_dec(&nr_munmap_tracking); + atomic_dec(&nr_munmap_counters); if (counter->attr.comm) - atomic_dec(&nr_comm_tracking); + atomic_dec(&nr_comm_counters); if (counter->destroy) counter->destroy(counter); @@ -2475,6 +2475,105 @@ static void perf_counter_output(struct perf_counter *counter, perf_output_end(&handle); } +/* + * fork tracking + */ + +struct perf_fork_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 ppid; + } event; +}; + +static void perf_counter_fork_output(struct perf_counter *counter, + struct perf_fork_event *fork_event) +{ + struct perf_output_handle handle; + int size = fork_event->event.header.size; + struct task_struct *task = fork_event->task; + int ret = perf_output_begin(&handle, counter, size, 0, 0); + + if (ret) + return; + + fork_event->event.pid = perf_counter_pid(counter, task); + fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); + + perf_output_put(&handle, fork_event->event); + perf_output_end(&handle); +} + +static int perf_counter_fork_match(struct perf_counter *counter) +{ + if (counter->attr.comm || counter->attr.mmap || counter->attr.munmap) + return 1; + + return 0; +} + +static void perf_counter_fork_ctx(struct perf_counter_context *ctx, + struct perf_fork_event *fork_event) +{ + struct perf_counter *counter; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { + if (perf_counter_fork_match(counter)) + perf_counter_fork_output(counter, fork_event); + } + rcu_read_unlock(); +} + +static void perf_counter_fork_event(struct perf_fork_event *fork_event) +{ + struct perf_cpu_context *cpuctx; + struct perf_counter_context *ctx; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_counter_fork_ctx(&cpuctx->ctx, fork_event); + put_cpu_var(perf_cpu_context); + + rcu_read_lock(); + /* + * doesn't really matter which of the child contexts the + * events ends up in. + */ + ctx = rcu_dereference(current->perf_counter_ctxp); + if (ctx) + perf_counter_fork_ctx(ctx, fork_event); + rcu_read_unlock(); +} + +void perf_counter_fork(struct task_struct *task) +{ + struct perf_fork_event fork_event; + + if (!atomic_read(&nr_comm_counters) && + !atomic_read(&nr_mmap_counters) && + !atomic_read(&nr_munmap_counters)) + return; + + fork_event = (struct perf_fork_event){ + .task = task, + .event = { + .header = { + .type = PERF_EVENT_FORK, + .size = sizeof(fork_event.event), + }, + }, + }; + + perf_counter_fork_event(&fork_event); +} + /* * comm tracking */ @@ -2511,11 +2610,9 @@ static void perf_counter_comm_output(struct perf_counter *counter, perf_output_end(&handle); } -static int perf_counter_comm_match(struct perf_counter *counter, - struct perf_comm_event *comm_event) +static int perf_counter_comm_match(struct perf_counter *counter) { - if (counter->attr.comm && - comm_event->event.header.type == PERF_EVENT_COMM) + if (counter->attr.comm) return 1; return 0; @@ -2531,7 +2628,7 @@ static void perf_counter_comm_ctx(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_comm_match(counter, comm_event)) + if (perf_counter_comm_match(counter)) perf_counter_comm_output(counter, comm_event); } rcu_read_unlock(); @@ -2570,7 +2667,7 @@ void perf_counter_comm(struct task_struct *task) { struct perf_comm_event comm_event; - if (!atomic_read(&nr_comm_tracking)) + if (!atomic_read(&nr_comm_counters)) return; comm_event = (struct perf_comm_event){ @@ -2708,7 +2805,7 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, { struct perf_mmap_event mmap_event; - if (!atomic_read(&nr_mmap_tracking)) + if (!atomic_read(&nr_mmap_counters)) return; mmap_event = (struct perf_mmap_event){ @@ -2729,7 +2826,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, { struct perf_mmap_event mmap_event; - if (!atomic_read(&nr_munmap_tracking)) + if (!atomic_read(&nr_munmap_counters)) return; mmap_event = (struct perf_mmap_event){ @@ -3427,11 +3524,11 @@ done: atomic_inc(&nr_counters); if (counter->attr.mmap) - atomic_inc(&nr_mmap_tracking); + atomic_inc(&nr_mmap_counters); if (counter->attr.munmap) - atomic_inc(&nr_munmap_tracking); + atomic_inc(&nr_munmap_counters); if (counter->attr.comm) - atomic_inc(&nr_comm_tracking); + atomic_inc(&nr_comm_counters); return counter; } -- cgit v1.2.3-70-g09d2 From 2dff440525f8faba8836e9f05297b76f23b4af30 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 31 May 2008 15:56:17 +0200 Subject: kmemcheck: add mm functions With kmemcheck enabled, the slab allocator needs to do this: 1. Tell kmemcheck to allocate the shadow memory which stores the status of each byte in the allocation proper, e.g. whether it is initialized or uninitialized. 2. Tell kmemcheck which parts of memory that should be marked uninitialized. There are actually a few more states, such as "not yet allocated" and "recently freed". If a slab cache is set up using the SLAB_NOTRACK flag, it will never return memory that can take page faults because of kmemcheck. If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still request memory with the __GFP_NOTRACK flag. This does not prevent the page faults from occuring, however, but marks the object in question as being initialized so that no warnings will ever be produced for this object. In addition to (and in contrast to) __GFP_NOTRACK, the __GFP_NOTRACK_FALSE_POSITIVE flag indicates that the allocation should not be tracked _because_ it would produce a false positive. Their values are identical, but need not be so in the future (for example, we could now enable/disable false positives with a config option). Parts of this patch were contributed by Pekka Enberg but merged for atomicity. Signed-off-by: Vegard Nossum Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/kernel/process.c | 2 +- include/linux/gfp.h | 9 +++- include/linux/kmemcheck.h | 47 +++++++++++++++++++++ include/linux/slab.h | 7 ++++ kernel/fork.c | 14 +++---- mm/Makefile | 1 + mm/kmemcheck.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 mm/kmemcheck.c (limited to 'kernel/fork.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3bb2be1649b..994dd6a4a2a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -63,7 +63,7 @@ void arch_task_cache_init(void) task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), - SLAB_PANIC, NULL); + SLAB_PANIC | SLAB_NOTRACK, NULL); } /* diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0bbc15f5453..daeaa8fe1bb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -51,8 +51,15 @@ struct vm_area_struct; #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ #define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ +#define __GFP_NOTRACK ((__force gfp_t)0x200000u) /* Don't track with kmemcheck */ -#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */ +/* + * This may seem redundant, but it's a way of annotating false positives vs. + * allocations that simply cannot be supported (e.g. page tables). + */ +#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) + +#define __GFP_BITS_SHIFT 22 /* Room for 22 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 39480c91b2f..5b65f4ebead 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -7,11 +7,58 @@ #ifdef CONFIG_KMEMCHECK extern int kmemcheck_enabled; +/* The slab-related functions. */ +void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, + struct page *page, int order); +void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order); +void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, + size_t size); +void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); + +void kmemcheck_show_pages(struct page *p, unsigned int n); +void kmemcheck_hide_pages(struct page *p, unsigned int n); + +bool kmemcheck_page_is_tracked(struct page *p); + +void kmemcheck_mark_unallocated(void *address, unsigned int n); +void kmemcheck_mark_uninitialized(void *address, unsigned int n); +void kmemcheck_mark_initialized(void *address, unsigned int n); +void kmemcheck_mark_freed(void *address, unsigned int n); + +void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); +void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); + int kmemcheck_show_addr(unsigned long address); int kmemcheck_hide_addr(unsigned long address); #else #define kmemcheck_enabled 0 +static inline void +kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, + struct page *page, int order) +{ +} + +static inline void +kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order) +{ +} + +static inline void +kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, + size_t size) +{ +} + +static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, + size_t size) +{ +} + +static inline bool kmemcheck_page_is_tracked(struct page *p) +{ + return false; +} #endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 48803064ced..e339fcf17cd 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -64,6 +64,13 @@ #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ +/* Don't track use of uninitialized memory */ +#ifdef CONFIG_KMEMCHECK +# define SLAB_NOTRACK 0x01000000UL +#else +# define SLAB_NOTRACK 0x00000000UL +#endif + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/kernel/fork.c b/kernel/fork.c index 4430eb1376f..be022c200da 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -178,7 +178,7 @@ void __init fork_init(unsigned long mempages) /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct), - ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); + ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif /* do the arch specific task caches init */ @@ -1470,20 +1470,20 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, - sighand_ctor); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| + SLAB_NOTRACK, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); files_cachep = kmem_cache_create("files_cache", sizeof(struct files_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); mmap_init(); } diff --git a/mm/Makefile b/mm/Makefile index e89acb090b4..c379ce08354 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o +obj-$(CONFIG_KMEMCHECK) += kmemcheck.o obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c new file mode 100644 index 00000000000..eaa41b80261 --- /dev/null +++ b/mm/kmemcheck.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, + struct page *page, int order) +{ + struct page *shadow; + int pages; + int i; + + pages = 1 << order; + + /* + * With kmemcheck enabled, we need to allocate a memory area for the + * shadow bits as well. + */ + shadow = alloc_pages_node(node, flags, order); + if (!shadow) { + if (printk_ratelimit()) + printk(KERN_ERR "kmemcheck: failed to allocate " + "shadow bitmap\n"); + return; + } + + for(i = 0; i < pages; ++i) + page[i].shadow = page_address(&shadow[i]); + + /* + * Mark it as non-present for the MMU so that our accesses to + * this memory will trigger a page fault and let us analyze + * the memory accesses. + */ + kmemcheck_hide_pages(page, pages); + + /* + * Objects from caches that have a constructor don't get + * cleared when they're allocated, so we need to do it here. + */ + if (s->ctor) + kmemcheck_mark_uninitialized_pages(page, pages); + else + kmemcheck_mark_unallocated_pages(page, pages); +} + +void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order) +{ + struct page *shadow; + int pages; + int i; + + pages = 1 << order; + + kmemcheck_show_pages(page, pages); + + shadow = virt_to_page(page[0].shadow); + + for(i = 0; i < pages; ++i) + page[i].shadow = NULL; + + __free_pages(shadow, order); +} + +void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, + size_t size) +{ + /* + * Has already been memset(), which initializes the shadow for us + * as well. + */ + if (gfpflags & __GFP_ZERO) + return; + + /* No need to initialize the shadow of a non-tracked slab. */ + if (s->flags & SLAB_NOTRACK) + return; + + if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { + /* + * Allow notracked objects to be allocated from + * tracked caches. Note however that these objects + * will still get page faults on access, they just + * won't ever be flagged as uninitialized. If page + * faults are not acceptable, the slab cache itself + * should be marked NOTRACK. + */ + kmemcheck_mark_initialized(object, size); + } else if (!s->ctor) { + /* + * New objects should be marked uninitialized before + * they're returned to the called. + */ + kmemcheck_mark_uninitialized(object, size); + } +} + +void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) +{ + /* TODO: RCU freeing is unsupported for now; hide false positives. */ + if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) + kmemcheck_mark_freed(object, size); +} -- cgit v1.2.3-70-g09d2 From 72a1de39f89325a834a8c70b2a0d8f71d919f640 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 17 Jun 2009 16:27:37 -0700 Subject: copy_process(): remove the unneeded clear_tsk_thread_flag(TIF_SIGPENDING) The forked child can have TIF_SIGPENDING if it was copied from parent's ti->flags. But this is harmless and actually almost never happens, because copy_process() can't succeed if signal_pending() == T. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index be022c200da..467746b3f0a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1029,7 +1029,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); - clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; -- cgit v1.2.3-70-g09d2