diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/init_task.h | 11 | ||||
-rw-r--r-- | include/linux/kernel_stat.h | 8 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 305 | ||||
-rw-r--r-- | include/linux/prctl.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 14 | ||||
-rw-r--r-- | include/linux/syscalls.h | 5 |
6 files changed, 345 insertions, 1 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index af1de95e711..219748d0026 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -120,6 +120,16 @@ extern struct group_info init_groups; extern struct cred init_cred; +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_ctx.counter_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.lock = \ + __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -185,6 +195,7 @@ extern struct cred init_cred; INIT_IDS \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + INIT_PERF_COUNTERS(tsk) \ } diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0c8b89f28a9..b6d2887a5d8 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -81,7 +81,15 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } + +/* + * Lock/unlock the current runqueue - to extract task statistics: + */ +extern void curr_rq_lock_irq_save(unsigned long *flags); +extern void curr_rq_unlock_irq_restore(unsigned long *flags); +extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); + extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 00000000000..dde564517b6 --- /dev/null +++ b/include/linux/perf_counter.h @@ -0,0 +1,305 @@ +/* + * Performance counters: + * + * Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de> + * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_COUNTER_H +#define _LINUX_PERF_COUNTER_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/* + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, +}; + +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + __s64 type; + + __u64 irq_period; + __u64 record_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + + __reserved_1 : 54; + + __u32 extra_config_len; + __u32 __reserved_4; + + __u64 __reserved_2; + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_COUNTERS +# include <asm/perf_counter.h> +#endif + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> + +struct task_struct; + +/** + * struct hw_perf_counter - performance counter hardware details: + */ +struct hw_perf_counter { +#ifdef CONFIG_PERF_COUNTERS + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + atomic64_t prev_count; + u64 irq_period; + atomic64_t period_left; +#endif +}; + +/* + * Hardcoded buffer length limit for now, for IRQ-fed events: + */ +#define PERF_DATA_BUFLEN 2048 + +/** + * struct perf_data - performance counter IRQ data sampling ... + */ +struct perf_data { + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; +}; + +struct perf_counter; + +/** + * struct hw_perf_counter_ops - performance counter hw ops + */ +struct hw_perf_counter_ops { + int (*enable) (struct perf_counter *counter); + void (*disable) (struct perf_counter *counter); + void (*read) (struct perf_counter *counter); +}; + +/** + * enum perf_counter_active_state - the states of a counter + */ +enum perf_counter_active_state { + PERF_COUNTER_STATE_ERROR = -2, + PERF_COUNTER_STATE_OFF = -1, + PERF_COUNTER_STATE_INACTIVE = 0, + PERF_COUNTER_STATE_ACTIVE = 1, +}; + +struct file; + +/** + * struct perf_counter - performance counter kernel representation: + */ +struct perf_counter { +#ifdef CONFIG_PERF_COUNTERS + struct list_head list_entry; + struct list_head sibling_list; + struct perf_counter *group_leader; + const struct hw_perf_counter_ops *hw_ops; + + enum perf_counter_active_state state; + enum perf_counter_active_state prev_state; + atomic64_t count; + + struct perf_counter_hw_event hw_event; + struct hw_perf_counter hw; + + struct perf_counter_context *ctx; + struct task_struct *task; + struct file *filp; + + struct perf_counter *parent; + struct list_head child_list; + + /* + * Protect attach/detach and child_list: + */ + struct mutex mutex; + + int oncpu; + int cpu; + + /* read() / irq related data */ + wait_queue_head_t waitq; + /* optional: for NMIs */ + int wakeup_pending; + struct perf_data *irqdata; + struct perf_data *usrdata; + struct perf_data data[2]; +#endif +}; + +/** + * struct perf_counter_context - counter context structure + * + * Used as a container for task counters and CPU counters as well: + */ +struct perf_counter_context { +#ifdef CONFIG_PERF_COUNTERS + /* + * Protect the states of the counters in the list, + * nr_active, and the list: + */ + spinlock_t lock; + /* + * Protect the list of counters. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head counter_list; + int nr_counters; + int nr_active; + int is_active; + struct task_struct *task; +#endif +}; + +/** + * struct perf_counter_cpu_context - per cpu counter context structure + */ +struct perf_cpu_context { + struct perf_counter_context ctx; + struct perf_counter_context *task_ctx; + int active_oncpu; + int max_pertask; + int exclusive; +}; + +/* + * Set by architecture code: + */ +extern int perf_max_counters; + +#ifdef CONFIG_PERF_COUNTERS +extern const struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter); + +extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); +extern void perf_counter_task_tick(struct task_struct *task, int cpu); +extern void perf_counter_init_task(struct task_struct *child); +extern void perf_counter_exit_task(struct task_struct *child); +extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_print_debug(void); +extern void perf_counter_unthrottle(void); +extern u64 hw_perf_save_disable(void); +extern void hw_perf_restore(u64 ctrl); +extern int perf_counter_task_disable(void); +extern int perf_counter_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu); + +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + +#else +static inline void +perf_counter_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_sched_out(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_tick(struct task_struct *task, int cpu) { } +static inline void perf_counter_init_task(struct task_struct *child) { } +static inline void perf_counter_exit_task(struct task_struct *child) { } +static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_print_debug(void) { } +static inline void perf_counter_unthrottle(void) { } +static inline void hw_perf_restore(u64 ctrl) { } +static inline u64 hw_perf_save_disable(void) { return 0; } +static inline int perf_counter_task_disable(void) { return -EINVAL; } +static inline int perf_counter_task_enable(void) { return -EINVAL; } +#endif + +#endif /* __KERNEL__ */ +#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e..b00df4c79c6 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,4 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index b94f3541f67..75b2fc5306d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,6 +71,7 @@ struct sched_param { #include <linux/path.h> #include <linux/compiler.h> #include <linux/completion.h> +#include <linux/perf_counter.h> #include <linux/pid.h> #include <linux/percpu.h> #include <linux/topology.h> @@ -137,6 +138,8 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern u64 cpu_nr_switches(int cpu); +extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); @@ -1048,9 +1051,10 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; + u64 nr_migrations; + u64 start_runtime; u64 avg_wakeup; - u64 nr_migrations; #ifdef CONFIG_SCHEDSTATS u64 wait_start; @@ -1372,6 +1376,7 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif + struct perf_counter_context perf_counter_ctx; #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; @@ -2380,6 +2385,13 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +/* + * Call the function if the target task is executing on a CPU right now: + */ +extern void task_oncpu_function_call(struct task_struct *p, + void (*func) (void *info), void *info); + + #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6470f74074a..471143bf2aa 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,6 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; +struct perf_counter_hw_event; #include <linux/types.h> #include <linux/aio_abi.h> @@ -754,4 +755,8 @@ asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); + +asmlinkage long sys_perf_counter_open( + const struct perf_counter_hw_event __user *hw_event_uptr, + pid_t pid, int cpu, int group_fd, unsigned long flags); #endif |