diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/ftrace.h | 77 | ||||
-rw-r--r-- | include/linux/ftrace_event.h | 9 | ||||
-rw-r--r-- | include/linux/genhd.h | 1 | ||||
-rw-r--r-- | include/linux/interrupt.h | 7 | ||||
-rw-r--r-- | include/linux/iocontext.h | 10 | ||||
-rw-r--r-- | include/linux/jump_label.h | 162 | ||||
-rw-r--r-- | include/linux/netdevice.h | 4 | ||||
-rw-r--r-- | include/linux/netfilter.h | 6 | ||||
-rw-r--r-- | include/linux/perf_event.h | 108 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 83 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 10 | ||||
-rw-r--r-- | include/linux/rcutree.h | 19 | ||||
-rw-r--r-- | include/linux/sched.h | 13 | ||||
-rw-r--r-- | include/linux/srcu.h | 15 | ||||
-rw-r--r-- | include/linux/static_key.h | 1 | ||||
-rw-r--r-- | include/linux/tracepoint.h | 28 | ||||
-rw-r--r-- | include/linux/workqueue.h | 4 | ||||
-rw-r--r-- | include/net/sock.h | 6 | ||||
-rw-r--r-- | include/trace/events/power.h | 2 | ||||
-rw-r--r-- | include/trace/events/printk.h | 41 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 63 | ||||
-rw-r--r-- | include/trace/events/sched.h | 27 | ||||
-rw-r--r-- | include/trace/events/signal.h | 85 |
23 files changed, 598 insertions, 183 deletions
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 028e26f0bf0..72a6cabb4d5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +/* + * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are + * set in the flags member. + * + * ENABLED - set/unset when ftrace_ops is registered/unregistered + * GLOBAL - set manualy by ftrace_ops user to denote the ftrace_ops + * is part of the global tracers sharing the same filter + * via set_ftrace_* debugfs files. + * DYNAMIC - set when ftrace_ops is registered to denote dynamically + * allocated ftrace_ops which need special care + * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops + * could be controled by following calls: + * ftrace_function_local_enable + * ftrace_function_local_disable + */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_GLOBAL = 1 << 1, FTRACE_OPS_FL_DYNAMIC = 1 << 2, + FTRACE_OPS_FL_CONTROL = 1 << 3, }; struct ftrace_ops { ftrace_func_t func; struct ftrace_ops *next; unsigned long flags; + int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; @@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); +/** + * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu + * + * This function enables tracing on current cpu by decreasing + * the per cpu control variable. + * It must be called with preemption disabled and only on ftrace_ops + * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. + */ +static inline void ftrace_function_local_enable(struct ftrace_ops *ops) +{ + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) + return; + + (*this_cpu_ptr(ops->disabled))--; +} + +/** + * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu + * + * This function enables tracing on current cpu by decreasing + * the per cpu control variable. + * It must be called with preemption disabled and only on ftrace_ops + * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. + */ +static inline void ftrace_function_local_disable(struct ftrace_ops *ops) +{ + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) + return; + + (*this_cpu_ptr(ops->disabled))++; +} + +/** + * ftrace_function_local_disabled - returns ftrace_ops disabled value + * on current cpu + * + * This function returns value of ftrace_ops::disabled on current cpu. + * It must be called with preemption disabled and only on ftrace_ops + * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. + */ +static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) +{ + WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)); + return *this_cpu_ptr(ops->disabled); +} + extern void ftrace_stub(unsigned long a0, unsigned long a1); #else /* !CONFIG_FUNCTION_TRACER */ @@ -178,12 +244,13 @@ struct dyn_ftrace { }; int ftrace_force_update(void); -void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, +int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); -void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, +int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); void ftrace_set_global_filter(unsigned char *buf, int len, int reset); void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); +void ftrace_free_filter(struct ftrace_ops *ops); int register_ftrace_command(struct ftrace_func_command *cmd); int unregister_ftrace_command(struct ftrace_func_command *cmd); @@ -314,9 +381,6 @@ extern void ftrace_enable_daemon(void); #else static inline int skip_trace(unsigned long ip) { return 0; } static inline int ftrace_force_update(void) { return 0; } -static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) -{ -} static inline void ftrace_disable_daemon(void) { } static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} @@ -340,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end) */ #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) +#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) +#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) +#define ftrace_free_filter(ops) do { } while (0) static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { return -ENODEV; } diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c3da42dd22b..dd478fc8f9f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -146,6 +146,10 @@ enum trace_reg { TRACE_REG_UNREGISTER, TRACE_REG_PERF_REGISTER, TRACE_REG_PERF_UNREGISTER, + TRACE_REG_PERF_OPEN, + TRACE_REG_PERF_CLOSE, + TRACE_REG_PERF_ADD, + TRACE_REG_PERF_DEL, }; struct ftrace_event_call; @@ -157,7 +161,7 @@ struct ftrace_event_class { void *perf_probe; #endif int (*reg)(struct ftrace_event_call *event, - enum trace_reg type); + enum trace_reg type, void *data); int (*define_fields)(struct ftrace_event_call *); struct list_head *(*get_fields)(struct ftrace_event_call *); struct list_head fields; @@ -165,7 +169,7 @@ struct ftrace_event_class { }; extern int ftrace_event_reg(struct ftrace_event_call *event, - enum trace_reg type); + enum trace_reg type, void *data); enum { TRACE_EVENT_FL_ENABLED_BIT, @@ -241,6 +245,7 @@ enum { FILTER_STATIC_STRING, FILTER_DYN_STRING, FILTER_PTR_STRING, + FILTER_TRACE_FN, }; #define EVENT_STORAGE_SIZE 128 diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fe23ee76858..e61d3192448 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); +extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a64b00e286f..3f830e00511 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -20,7 +20,6 @@ #include <linux/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> -#include <trace/events/irq.h> /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -456,11 +455,7 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -static inline void __raise_softirq_irqoff(unsigned int nr) -{ - trace_softirq_raise(nr); - or_softirq_pending(1UL << nr); -} +extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 119773eebe3..1a301806303 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -6,8 +6,11 @@ #include <linux/workqueue.h> enum { - ICQ_IOPRIO_CHANGED, - ICQ_CGROUP_CHANGED, + ICQ_IOPRIO_CHANGED = 1 << 0, + ICQ_CGROUP_CHANGED = 1 << 1, + ICQ_EXITED = 1 << 2, + + ICQ_CHANGED_MASK = ICQ_IOPRIO_CHANGED | ICQ_CGROUP_CHANGED, }; /* @@ -88,7 +91,7 @@ struct io_cq { struct rcu_head __rcu_head; }; - unsigned long changed; + unsigned int flags; }; /* @@ -139,6 +142,7 @@ struct io_context *get_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node); void ioc_ioprio_changed(struct io_context *ioc, int ioprio); void ioc_cgroup_changed(struct io_context *ioc); +unsigned int icq_get_changed(struct io_cq *icq); #else struct io_context; static inline void put_io_context(struct io_context *ioc) { } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5ce8b140428..c513a40510f 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,22 +1,69 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H +/* + * Jump label support + * + * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> + * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com> + * + * Jump labels provide an interface to generate dynamic branches using + * self-modifying code. Assuming toolchain and architecture support the result + * of a "if (static_key_false(&key))" statement is a unconditional branch (which + * defaults to false - and the true block is placed out of line). + * + * However at runtime we can change the branch target using + * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key + * object and for as long as there are references all branches referring to + * that particular key will point to the (out of line) true block. + * + * Since this relies on modifying code the static_key_slow_{inc,dec}() functions + * must be considered absolute slow paths (machine wide synchronization etc.). + * OTOH, since the affected branches are unconditional their runtime overhead + * will be absolutely minimal, esp. in the default (off) case where the total + * effect is a single NOP of appropriate size. The on case will patch in a jump + * to the out-of-line block. + * + * When the control is directly exposed to userspace it is prudent to delay the + * decrement to avoid high frequency code modifications which can (and do) + * cause significant performance degradation. Struct static_key_deferred and + * static_key_slow_dec_deferred() provide for this. + * + * Lacking toolchain and or architecture support, it falls back to a simple + * conditional branch. + * + * struct static_key my_key = STATIC_KEY_INIT_TRUE; + * + * if (static_key_true(&my_key)) { + * } + * + * will result in the true case being in-line and starts the key with a single + * reference. Mixing static_key_true() and static_key_false() on the same key is not + * allowed. + * + * Not initializing the key (static data is initialized to 0s anyway) is the + * same as using STATIC_KEY_INIT_FALSE and static_key_false() is + * equivalent with static_branch(). + * +*/ + #include <linux/types.h> #include <linux/compiler.h> #include <linux/workqueue.h> #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) -struct jump_label_key { +struct static_key { atomic_t enabled; +/* Set lsb bit to 1 if branch is default true, 0 ot */ struct jump_entry *entries; #ifdef CONFIG_MODULES - struct jump_label_mod *next; + struct static_key_mod *next; #endif }; -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; unsigned long timeout; struct delayed_work work; }; @@ -34,13 +81,34 @@ struct module; #ifdef HAVE_JUMP_LABEL -#ifdef CONFIG_MODULES -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} -#else -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} -#endif +#define JUMP_LABEL_TRUE_BRANCH 1UL + +static +inline struct jump_entry *jump_label_get_entries(struct static_key *key) +{ + return (struct jump_entry *)((unsigned long)key->entries + & ~JUMP_LABEL_TRUE_BRANCH); +} -static __always_inline bool static_branch(struct jump_label_key *key) +static inline bool jump_label_get_branch_default(struct static_key *key) +{ + if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH) + return true; + return false; +} + +static __always_inline bool static_key_false(struct static_key *key) +{ + return arch_static_branch(key); +} + +static __always_inline bool static_key_true(struct static_key *key) +{ + return !static_key_false(key); +} + +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) { return arch_static_branch(key); } @@ -56,21 +124,23 @@ extern void arch_jump_label_transform(struct jump_entry *entry, extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern int jump_label_text_reserved(void *start, void *end); -extern void jump_label_inc(struct jump_label_key *key); -extern void jump_label_dec(struct jump_label_key *key); -extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); -extern bool jump_label_enabled(struct jump_label_key *key); +extern void static_key_slow_inc(struct static_key *key); +extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_dec_deferred(struct static_key_deferred *key); extern void jump_label_apply_nops(struct module *mod); -extern void jump_label_rate_limit(struct jump_label_key_deferred *key, - unsigned long rl); +extern void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) #else /* !HAVE_JUMP_LABEL */ #include <linux/atomic.h> -#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} - -struct jump_label_key { +struct static_key { atomic_t enabled; }; @@ -78,30 +148,45 @@ static __always_inline void jump_label_init(void) { } -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; }; -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_false(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static __always_inline bool static_key_true(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled))) + if (likely(atomic_read(&key->enabled)) > 0) return true; return false; } -static inline void jump_label_inc(struct jump_label_key *key) +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static inline void static_key_slow_inc(struct static_key *key) { atomic_inc(&key->enabled); } -static inline void jump_label_dec(struct jump_label_key *key) +static inline void static_key_slow_dec(struct static_key *key) { atomic_dec(&key->enabled); } -static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) +static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { - jump_label_dec(&key->key); + static_key_slow_dec(&key->key); } static inline int jump_label_text_reserved(void *start, void *end) @@ -112,23 +197,30 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -static inline bool jump_label_enabled(struct jump_label_key *key) -{ - return !!atomic_read(&key->enabled); -} - static inline int jump_label_apply_nops(struct module *mod) { return 0; } -static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, +static inline void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { } + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1) }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) + #endif /* HAVE_JUMP_LABEL */ -#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) -#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) +#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#define jump_label_enabled static_key_enabled + +static inline bool static_key_enabled(struct static_key *key) +{ + return (atomic_read(&key->enabled) > 0); +} #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c9525..7dfaae7846a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -214,8 +214,8 @@ enum { #include <linux/skbuff.h> #ifdef CONFIG_RPS -#include <linux/jump_label.h> -extern struct jump_label_key rps_needed; +#include <linux/static_key.h> +extern struct static_key rps_needed; #endif struct neighbour; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b809265607d..29734be334c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if defined(CONFIG_JUMP_LABEL) -#include <linux/jump_label.h> -extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#include <linux/static_key.h> +extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) - return static_branch(&nf_hooks_needed[pf][hook]); + return static_key_false(&nf_hooks_needed[pf][hook]); return !list_empty(&nf_hooks[pf][hook]); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index abb2776be1b..bd9f55a5958 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,11 +129,40 @@ enum perf_event_sample_format { PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, + PERF_SAMPLE_BRANCH_STACK = 1U << 11, - PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ }; /* + * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * + * If the user does not pass priv level information via branch_sample_type, + * the kernel uses the event's priv level. Branch and event priv levels do + * not have to match. Branch priv level is checked for permissions. + * + * The branch types can be combined, however BRANCH_ANY covers all types + * of branches and therefore it supersedes all the other types. + */ +enum perf_branch_sample_type { + PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ + + PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ +}; + +#define PERF_SAMPLE_BRANCH_PLM_ALL \ + (PERF_SAMPLE_BRANCH_USER|\ + PERF_SAMPLE_BRANCH_KERNEL|\ + PERF_SAMPLE_BRANCH_HV) + +/* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: * @@ -163,6 +192,8 @@ enum perf_event_read_format { }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ +#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -240,6 +271,7 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; + __u64 branch_sample_type; /* enum branch_sample_type */ }; /* @@ -291,12 +323,14 @@ struct perf_event_mmap_page { __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ __u64 time_running; /* time event on cpu */ + __u32 time_mult, time_shift; + __u64 time_offset; /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[123]; /* align to 1k */ + __u64 __reserved[121]; /* align to 1k */ /* * Control data for the mmap() data buffer. @@ -456,6 +490,8 @@ enum perf_event_type { * * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW + * + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * }; */ PERF_RECORD_SAMPLE = 9, @@ -512,7 +548,7 @@ struct perf_guest_info_callbacks { #include <linux/ftrace.h> #include <linux/cpu.h> #include <linux/irq_work.h> -#include <linux/jump_label.h> +#include <linux/static_key.h> #include <linux/atomic.h> #include <asm/local.h> @@ -528,12 +564,34 @@ struct perf_raw_record { void *data; }; +/* + * single taken branch record layout: + * + * from: source instruction (may not always be a branch insn) + * to: branch target + * mispred: branch target was mispredicted + * predicted: branch target was predicted + * + * support for mispred, predicted is optional. In case it + * is not supported mispred = predicted = 0. + */ struct perf_branch_entry { - __u64 from; - __u64 to; - __u64 flags; + __u64 from; + __u64 to; + __u64 mispred:1, /* target mispredicted */ + predicted:1,/* target predicted */ + reserved:62; }; +/* + * branch stack layout: + * nr: number of taken branches stored in entries[] + * + * Note that nr can vary from sample to sample + * branches (to, from) are stored from most recent + * to least recent, i.e., entries[0] contains the most + * recent branch. + */ struct perf_branch_stack { __u64 nr; struct perf_branch_entry entries[0]; @@ -564,7 +622,9 @@ struct hw_perf_event { unsigned long event_base; int idx; int last_cpu; + struct hw_perf_event_extra extra_reg; + struct hw_perf_event_extra branch_reg; }; struct { /* software */ struct hrtimer hrtimer; @@ -616,6 +676,7 @@ struct pmu { struct list_head entry; struct device *dev; + const struct attribute_group **attr_groups; char *name; int type; @@ -681,6 +742,17 @@ struct pmu { * for each successful ->add() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); /* optional */ + + /* + * Will return the value for perf_event_mmap_page::index for this event, + * if no implementation is provided it will default to: event->hw.idx + 1. + */ + int (*event_idx) (struct perf_event *event); /*optional */ + + /* + * flush branch stack on context-switches (needed in cpu-wide mode) + */ + void (*flush_branch_stack) (void); }; /** @@ -850,6 +922,9 @@ struct perf_event { #ifdef CONFIG_EVENT_TRACING struct ftrace_event_call *tp_event; struct event_filter *filter; +#ifdef CONFIG_FUNCTION_TRACER + struct ftrace_ops ftrace_ops; +#endif #endif #ifdef CONFIG_CGROUP_PERF @@ -911,7 +986,8 @@ struct perf_event_context { u64 parent_gen; u64 generation; int pin_count; - int nr_cgroups; /* cgroup events present */ + int nr_cgroups; /* cgroup evts */ + int nr_branch_stack; /* branch_stack evt */ struct rcu_head rcu_head; }; @@ -976,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); + struct perf_sample_data { u64 type; @@ -995,12 +1072,14 @@ struct perf_sample_data { u64 period; struct perf_callchain_entry *callchain; struct perf_raw_record *raw; + struct perf_branch_stack *br_stack; }; static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) { data->addr = addr; data->raw = NULL; + data->br_stack = NULL; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1029,7 +1108,7 @@ static inline int is_software_event(struct perf_event *event) return event->pmu->task_ctx_nr == perf_sw_context; } -extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); @@ -1057,7 +1136,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; - if (static_branch(&perf_swevent_enabled[event_id])) { + if (static_key_false(&perf_swevent_enabled[event_id])) { if (!regs) { perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; @@ -1066,12 +1145,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) } } -extern struct jump_label_key_deferred perf_sched_events; +extern struct static_key_deferred perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); } @@ -1080,7 +1159,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); } @@ -1139,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); # define perf_instruction_pointer(regs) instruction_pointer(regs) #endif +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 81c04f4348e..937217425c4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -190,6 +190,33 @@ extern void rcu_idle_exit(void); extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +/** + * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers + * @a: Code that RCU needs to pay attention to. + * + * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden + * in the inner idle loop, that is, between the rcu_idle_enter() and + * the rcu_idle_exit() -- RCU will happily ignore any such read-side + * critical sections. However, things like powertop need tracepoints + * in the inner idle loop. + * + * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) + * will tell RCU that it needs to pay attending, invoke its argument + * (in this example, a call to the do_something_with_RCU() function), + * and then tell RCU to go back to ignoring this CPU. It is permissible + * to nest RCU_NONIDLE() wrappers, but the nesting level is currently + * quite limited. If deeper nesting is required, it will be necessary + * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. + * + * This macro may be used from process-level code only. + */ +#define RCU_NONIDLE(a) \ + do { \ + rcu_idle_exit(); \ + do { a; } while (0); \ + rcu_idle_enter(); \ + } while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -226,6 +253,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) +bool rcu_lockdep_current_cpu_online(void); +#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ +static inline bool rcu_lockdep_current_cpu_online(void) +{ + return 1; +} +#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_PROVE_RCU @@ -239,13 +275,11 @@ static inline int rcu_is_cpu_idle(void) static inline void rcu_lock_acquire(struct lockdep_map *map) { - WARN_ON_ONCE(rcu_is_cpu_idle()); lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) { - WARN_ON_ONCE(rcu_is_cpu_idle()); lock_release(map, 1, _THIS_IP_); } @@ -270,6 +304,9 @@ extern int debug_lockdep_rcu_enabled(void); * occur in the same context, for example, it is illegal to invoke * rcu_read_unlock() in process context if the matching rcu_read_lock() * was invoked from within an irq handler. + * + * Note that rcu_read_lock() is disallowed if the CPU is either idle or + * offline from an RCU perspective, so check for those as well. */ static inline int rcu_read_lock_held(void) { @@ -277,6 +314,8 @@ static inline int rcu_read_lock_held(void) return 1; if (rcu_is_cpu_idle()) return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; return lock_is_held(&rcu_lock_map); } @@ -313,6 +352,9 @@ extern int rcu_read_lock_bh_held(void); * notice an extended quiescent state to other CPUs that started a grace * period. Otherwise we would delay any grace period as long as we run in * the idle task. + * + * Similarly, we avoid claiming an SRCU read lock held if the current + * CPU is offline. */ #ifdef CONFIG_PREEMPT_COUNT static inline int rcu_read_lock_sched_held(void) @@ -323,6 +365,8 @@ static inline int rcu_read_lock_sched_held(void) return 1; if (rcu_is_cpu_idle()) return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); @@ -381,8 +425,22 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) +static inline void rcu_preempt_sleep_check(void) +{ + rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), + "Illegal context switch in RCU read-side " + "critical section"); +} +#else /* #ifdef CONFIG_PROVE_RCU */ +static inline void rcu_preempt_sleep_check(void) +{ +} +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + #define rcu_sleep_check() \ do { \ + rcu_preempt_sleep_check(); \ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ "Illegal context switch in RCU-bh" \ " read-side critical section"); \ @@ -470,6 +528,13 @@ extern int rcu_my_thread_group_empty(void); * NULL. Although rcu_access_pointer() may also be used in cases where * update-side locks prevent the value of the pointer from changing, you * should instead use rcu_dereference_protected() for this use case. + * + * It is also permissible to use rcu_access_pointer() when read-side + * access to the pointer was removed at least one grace period ago, as + * is the case in the context of the RCU callback that is freeing up + * the data, or after a synchronize_rcu() returns. This can be useful + * when tearing down multi-linked structures after a grace period + * has elapsed. */ #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) @@ -659,6 +724,8 @@ static inline void rcu_read_lock(void) __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_lock() used illegally while idle"); } /* @@ -678,6 +745,8 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_unlock() used illegally while idle"); rcu_lock_release(&rcu_lock_map); __release(RCU); __rcu_read_unlock(); @@ -705,6 +774,8 @@ static inline void rcu_read_lock_bh(void) local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_lock_bh() used illegally while idle"); } /* @@ -714,6 +785,8 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); @@ -737,6 +810,8 @@ static inline void rcu_read_lock_sched(void) preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -753,6 +828,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); @@ -841,7 +918,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) /* See the kfree_rcu() header comment. */ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); - call_rcu(head, (rcu_callback)offset); + kfree_call_rcu(head, (rcu_callback)offset); } /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 00b7a5e493d..e93df77176d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,13 +27,9 @@ #include <linux/cache.h> -#ifdef CONFIG_RCU_BOOST static inline void rcu_init(void) { } -#else /* #ifdef CONFIG_RCU_BOOST */ -void rcu_init(void); -#endif /* #else #ifdef CONFIG_RCU_BOOST */ static inline void rcu_barrier_bh(void) { @@ -83,6 +79,12 @@ static inline void synchronize_sched_expedited(void) synchronize_sched(); } +static inline void kfree_call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + call_rcu(head, func); +} + #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 67458468f1a..e8ee5dd0854 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -61,6 +61,24 @@ extern void synchronize_rcu_bh(void); extern void synchronize_sched_expedited(void); extern void synchronize_rcu_expedited(void); +void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); + +/** + * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period + * + * Wait for an RCU-bh grace period to elapse, but use a "big hammer" + * approach to force the grace period to end quickly. This consumes + * significant time on all CPUs and is unfriendly to real-time workloads, + * so is thus not recommended for any sort of common-case code. In fact, + * if you are using synchronize_rcu_bh_expedited() in a loop, please + * restructure your code to batch your updates, and then use a single + * synchronize_rcu_bh() instead. + * + * Note that it is illegal to call this function while holding any lock + * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal + * to call this function from a CPU-hotplug notifier. Failing to observe + * these restriction will result in deadlock. + */ static inline void synchronize_rcu_bh_expedited(void) { synchronize_sched_expedited(); @@ -83,6 +101,7 @@ extern void rcu_sched_force_quiescent_state(void); /* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { + might_sleep(); /* Check for RCU read-side critical section. */ return num_online_cpus() == 1; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cd002cc9e6..e074e1e54f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1335,6 +1335,11 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; +#ifdef CONFIG_GENERIC_HARDIRQS + /* IRQ handler threads */ + unsigned irq_thread:1; +#endif + pid_t pid; pid_t tgid; @@ -1443,11 +1448,6 @@ struct task_struct { * mempolicy */ spinlock_t alloc_lock; -#ifdef CONFIG_GENERIC_HARDIRQS - /* IRQ handler threads */ - struct irqaction *irqaction; -#endif - /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; @@ -1879,8 +1879,7 @@ extern void task_clear_jobctl_pending(struct task_struct *task, #ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ -#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */ -#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ static inline void rcu_copy_process(struct task_struct *p) { diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e1b005918bb..d3d5fa54f25 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -99,15 +99,18 @@ long srcu_batches_completed(struct srcu_struct *sp); * power mode. This way we can notice an extended quiescent state to * other CPUs that started a grace period. Otherwise we would delay any * grace period as long as we run in the idle task. + * + * Similarly, we avoid claiming an SRCU read lock held if the current + * CPU is offline. */ static inline int srcu_read_lock_held(struct srcu_struct *sp) { - if (rcu_is_cpu_idle()) - return 0; - if (!debug_lockdep_rcu_enabled()) return 1; - + if (rcu_is_cpu_idle()) + return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; return lock_is_held(&sp->dep_map); } @@ -169,6 +172,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) int retval = __srcu_read_lock(sp); rcu_lock_acquire(&(sp)->dep_map); + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "srcu_read_lock() used illegally while idle"); return retval; } @@ -182,6 +187,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp) { + rcu_lockdep_assert(!rcu_is_cpu_idle(), + "srcu_read_unlock() used illegally while idle"); rcu_lock_release(&(sp)->dep_map); __srcu_read_unlock(sp, idx); } diff --git a/include/linux/static_key.h b/include/linux/static_key.h new file mode 100644 index 00000000000..27bd3f8a085 --- /dev/null +++ b/include/linux/static_key.h @@ -0,0 +1 @@ +#include <linux/jump_label.h> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index df0a779c1bb..bd96ecd0e05 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,7 +17,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> -#include <linux/jump_label.h> +#include <linux/static_key.h> struct module; struct tracepoint; @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - struct jump_label_key key; + struct static_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; @@ -114,7 +114,7 @@ static inline void tracepoint_synchronize_unregister(void) * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ -#define __DO_TRACE(tp, proto, args, cond) \ +#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ @@ -122,6 +122,7 @@ static inline void tracepoint_synchronize_unregister(void) \ if (!(cond)) \ return; \ + prercu; \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ @@ -132,6 +133,7 @@ static inline void tracepoint_synchronize_unregister(void) } while ((++it_func_ptr)->func); \ } \ rcu_read_unlock_sched_notrace(); \ + postrcu; \ } while (0) /* @@ -139,15 +141,25 @@ static inline void tracepoint_synchronize_unregister(void) * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ + if (static_key_false(&__tracepoint_##name.key)) \ + __DO_TRACE(&__tracepoint_##name, \ + TP_PROTO(data_proto), \ + TP_ARGS(data_args), \ + TP_CONDITION(cond),,); \ + } \ + static inline void trace_##name##_rcuidle(proto) \ + { \ if (static_branch(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ - TP_CONDITION(cond)); \ + TP_CONDITION(cond), \ + rcu_idle_exit(), \ + rcu_idle_enter()); \ } \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ @@ -176,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void) __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"))) = \ - { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ + { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ static struct tracepoint * const __tracepoint_ptr_##name __used \ __attribute__((section("__tracepoints_ptrs"))) = \ &__tracepoint_##name; @@ -190,9 +202,11 @@ static inline void tracepoint_synchronize_unregister(void) EXPORT_SYMBOL(__tracepoint_##name) #else /* !CONFIG_TRACEPOINTS */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ static inline void trace_##name(proto) \ { } \ + static inline void trace_##name##_rcuidle(proto) \ + { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index eb8b9f15f2e..af155450cab 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -289,12 +289,16 @@ enum { * * system_freezable_wq is equivalent to system_wq except that it's * freezable. + * + * system_nrt_freezable_wq is equivalent to system_nrt_wq except that + * it's freezable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; +extern struct workqueue_struct *system_nrt_freezable_wq; extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf02..dcde2d9268c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -55,7 +55,7 @@ #include <linux/uaccess.h> #include <linux/memcontrol.h> #include <linux/res_counter.h> -#include <linux/jump_label.h> +#include <linux/static_key.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> @@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #endif /* SOCK_REFCNT_DEBUG */ #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) -extern struct jump_label_key memcg_socket_limit_enabled; +extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) { return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); } -#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) +#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) #else #define mem_cgroup_sockets_enabled 0 static inline struct cg_proto *parent_cg_proto(struct proto *proto, diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 1bcc2a8c00e..14b38940062 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -151,6 +151,8 @@ enum { events get removed */ static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; static inline void trace_power_end(u64 cpuid) {}; +static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {}; +static inline void trace_power_end_rcuidle(u64 cpuid) {}; static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; #endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h new file mode 100644 index 00000000000..94ec79cc011 --- /dev/null +++ b/include/trace/events/printk.h @@ -0,0 +1,41 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM printk + +#if !defined(_TRACE_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PRINTK_H + +#include <linux/tracepoint.h> + +TRACE_EVENT_CONDITION(console, + TP_PROTO(const char *log_buf, unsigned start, unsigned end, + unsigned log_buf_len), + + TP_ARGS(log_buf, start, end, log_buf_len), + + TP_CONDITION(start != end), + + TP_STRUCT__entry( + __dynamic_array(char, msg, end - start + 1) + ), + + TP_fast_assign( + if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) { + memcpy(__get_dynamic_array(msg), + log_buf + (start & (log_buf_len - 1)), + log_buf_len - (start & (log_buf_len - 1))); + memcpy((char *)__get_dynamic_array(msg) + + log_buf_len - (start & (log_buf_len - 1)), + log_buf, end & (log_buf_len - 1)); + } else + memcpy(__get_dynamic_array(msg), + log_buf + (start & (log_buf_len - 1)), + end - start); + ((char *)__get_dynamic_array(msg))[end - start] = 0; + ), + + TP_printk("%s", __get_str(msg)) +); +#endif /* _TRACE_PRINTK_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d2d88bed891..337099783f3 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -313,19 +313,22 @@ TRACE_EVENT(rcu_prep_idle, /* * Tracepoint for the registration of a single RCU callback function. * The first argument is the type of RCU, the second argument is - * a pointer to the RCU callback itself, and the third element is the - * new RCU callback queue length for the current CPU. + * a pointer to the RCU callback itself, the third element is the + * number of lazy callbacks queued, and the fourth element is the + * total number of callbacks queued. */ TRACE_EVENT(rcu_callback, - TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), + TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen_lazy, + long qlen), - TP_ARGS(rcuname, rhp, qlen), + TP_ARGS(rcuname, rhp, qlen_lazy, qlen), TP_STRUCT__entry( __field(char *, rcuname) __field(void *, rhp) __field(void *, func) + __field(long, qlen_lazy) __field(long, qlen) ), @@ -333,11 +336,13 @@ TRACE_EVENT(rcu_callback, __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; + __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%pf %ld", - __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) + TP_printk("%s rhp=%p func=%pf %ld/%ld", + __entry->rcuname, __entry->rhp, __entry->func, + __entry->qlen_lazy, __entry->qlen) ); /* @@ -345,20 +350,21 @@ TRACE_EVENT(rcu_callback, * kfree() form. The first argument is the RCU type, the second argument * is a pointer to the RCU callback, the third argument is the offset * of the callback within the enclosing RCU-protected data structure, - * and the fourth argument is the new RCU callback queue length for the - * current CPU. + * the fourth argument is the number of lazy callbacks queued, and the + * fifth argument is the total number of callbacks queued. */ TRACE_EVENT(rcu_kfree_callback, TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, - long qlen), + long qlen_lazy, long qlen), - TP_ARGS(rcuname, rhp, offset, qlen), + TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen), TP_STRUCT__entry( __field(char *, rcuname) __field(void *, rhp) __field(unsigned long, offset) + __field(long, qlen_lazy) __field(long, qlen) ), @@ -366,41 +372,45 @@ TRACE_EVENT(rcu_kfree_callback, __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; + __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%ld %ld", + TP_printk("%s rhp=%p func=%ld %ld/%ld", __entry->rcuname, __entry->rhp, __entry->offset, - __entry->qlen) + __entry->qlen_lazy, __entry->qlen) ); /* * Tracepoint for marking the beginning rcu_do_batch, performed to start * RCU callback invocation. The first argument is the RCU flavor, - * the second is the total number of callbacks (including those that - * are not yet ready to be invoked), and the third argument is the - * current RCU-callback batch limit. + * the second is the number of lazy callbacks queued, the third is + * the total number of callbacks queued, and the fourth argument is + * the current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(char *rcuname, long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit), - TP_ARGS(rcuname, qlen, blimit), + TP_ARGS(rcuname, qlen_lazy, qlen, blimit), TP_STRUCT__entry( __field(char *, rcuname) + __field(long, qlen_lazy) __field(long, qlen) __field(int, blimit) ), TP_fast_assign( __entry->rcuname = rcuname; + __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("%s CBs=%ld bl=%d", - __entry->rcuname, __entry->qlen, __entry->blimit) + TP_printk("%s CBs=%ld/%ld bl=%d", + __entry->rcuname, __entry->qlen_lazy, __entry->qlen, + __entry->blimit) ); /* @@ -531,16 +541,21 @@ TRACE_EVENT(rcu_torture_read, #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) -#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) +#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ + qsmask) do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) -#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ + grplo, grphi, gp_tasks) do { } \ + while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) #define trace_rcu_prep_idle(reason) do { } while (0) -#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) -#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) -#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) +#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) +#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ + do { } while (0) +#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \ + do { } while (0) #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e33ed1bfa11..fbc7b1ad929 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/tracepoint.h> +#include <linux/binfmts.h> /* * Tracepoint for calling kthread_stop, performed to end a kthread: @@ -276,6 +277,32 @@ TRACE_EVENT(sched_process_fork, ); /* + * Tracepoint for exec: + */ +TRACE_EVENT(sched_process_exec, + + TP_PROTO(struct task_struct *p, pid_t old_pid, + struct linux_binprm *bprm), + + TP_ARGS(p, old_pid, bprm), + + TP_STRUCT__entry( + __string( filename, bprm->filename ) + __field( pid_t, pid ) + __field( pid_t, old_pid ) + ), + + TP_fast_assign( + __assign_str(filename, bprm->filename); + __entry->pid = p->pid; + __entry->old_pid = p->pid; + ), + + TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), + __entry->pid, __entry->old_pid) +); + +/* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index 17df43464df..39a8a430d90 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -23,11 +23,23 @@ } \ } while (0) +#ifndef TRACE_HEADER_MULTI_READ +enum { + TRACE_SIGNAL_DELIVERED, + TRACE_SIGNAL_IGNORED, + TRACE_SIGNAL_ALREADY_PENDING, + TRACE_SIGNAL_OVERFLOW_FAIL, + TRACE_SIGNAL_LOSE_INFO, +}; +#endif + /** * signal_generate - called when a signal is generated * @sig: signal number * @info: pointer to struct siginfo * @task: pointer to struct task_struct + * @group: shared or private + * @result: TRACE_SIGNAL_* * * Current process sends a 'sig' signal to 'task' process with * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, @@ -37,9 +49,10 @@ */ TRACE_EVENT(signal_generate, - TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task, + int group, int result), - TP_ARGS(sig, info, task), + TP_ARGS(sig, info, task, group, result), TP_STRUCT__entry( __field( int, sig ) @@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate, __field( int, code ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) + __field( int, group ) + __field( int, result ) ), TP_fast_assign( @@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate, TP_STORE_SIGINFO(__entry, info); memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->pid = task->pid; + __entry->group = group; + __entry->result = result; ), - TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", __entry->sig, __entry->errno, __entry->code, - __entry->comm, __entry->pid) + __entry->comm, __entry->pid, __entry->group, + __entry->result) ); /** @@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver, __entry->sa_handler, __entry->sa_flags) ); -DECLARE_EVENT_CLASS(signal_queue_overflow, - - TP_PROTO(int sig, int group, struct siginfo *info), - - TP_ARGS(sig, group, info), - - TP_STRUCT__entry( - __field( int, sig ) - __field( int, group ) - __field( int, errno ) - __field( int, code ) - ), - - TP_fast_assign( - __entry->sig = sig; - __entry->group = group; - TP_STORE_SIGINFO(__entry, info); - ), - - TP_printk("sig=%d group=%d errno=%d code=%d", - __entry->sig, __entry->group, __entry->errno, __entry->code) -); - -/** - * signal_overflow_fail - called when signal queue is overflow - * @sig: signal number - * @group: signal to process group or not (bool) - * @info: pointer to struct siginfo - * - * Kernel fails to generate 'sig' signal with 'info' siginfo, because - * siginfo queue is overflow, and the signal is dropped. - * 'group' is not 0 if the signal will be sent to a process group. - * 'sig' is always one of RT signals. - */ -DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail, - - TP_PROTO(int sig, int group, struct siginfo *info), - - TP_ARGS(sig, group, info) -); - -/** - * signal_lose_info - called when siginfo is lost - * @sig: signal number - * @group: signal to process group or not (bool) - * @info: pointer to struct siginfo - * - * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo - * queue is overflow. - * 'group' is not 0 if the signal will be sent to a process group. - * 'sig' is always one of non-RT signals. - */ -DEFINE_EVENT(signal_queue_overflow, signal_lose_info, - - TP_PROTO(int sig, int group, struct siginfo *info), - - TP_ARGS(sig, group, info) -); - #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ |