From fdfbbd07e91f8fe387140776f3fd94605f0c89e5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Sep 2013 07:40:39 -0700 Subject: perf: Add generic transaction flags Add a generic qualifier for transaction events, as a new sample type that returns a flag word. This is particularly useful for qualifying aborts: to distinguish aborts which happen due to asynchronous events (like conflicts caused by another CPU) versus instructions that lead to an abort. The tuning strategies are very different for those cases, so it's important to distinguish them easily and early. Since it's inconvenient and inflexible to filter for this in the kernel we report all the events out and allow some post processing in user space. The flags are based on the Intel TSX events, but should be fairly generic and mostly applicable to other HTM architectures too. In addition to various flag words there's also reserved space to report an program supplied abort code. For TSX this is used to distinguish specific classes of aborts, like a lock busy abort when doing lock elision. Flags: Elision and generic transactions (ELISION vs TRANSACTION) (HLE vs RTM on TSX; IBM etc. would likely only use TRANSACTION) Aborts caused by current thread vs aborts caused by others (SYNC vs ASYNC) Retryable transaction (RETRY) Conflicts with other threads (CONFLICT) Transaction write capacity overflow (CAPACITY WRITE) Transaction read capacity overflow (CAPACITY READ) Transactions implicitely aborted can also return an abort code. This can be used to signal specific events to the profiler. A common case is abort on lock busy in a RTM eliding library (code 0xff) To handle this case we include the TSX abort code Common example aborts in TSX would be: - Data conflict with another thread on memory read. Flags: TRANSACTION|ASYNC|CONFLICT - executing a WRMSR in a transaction. Flags: TRANSACTION|SYNC - HLE transaction in user space is too large Flags: ELISION|SYNC|CAPACITY-WRITE The only flag that is somewhat TSX specific is ELISION. This adds the perf core glue needed for reporting the new flag word out. v2: Add MEM/MISC v3: Move transaction to the end v4: Separate capacity-read/write and remove misc v5: Remove _SAMPLE. Move abort flags to 32bit. Rename transaction to txn Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379688044-14173-2-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 +++++ include/uapi/linux/perf_event.h | 25 ++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c8ba627c1d6..2e069d1288d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -584,6 +584,10 @@ struct perf_sample_data { struct perf_regs_user regs_user; u64 stack_user_size; u64 weight; + /* + * Transaction flags for abort events: + */ + u64 txn; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -599,6 +603,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->stack_user_size = 0; data->weight = 0; data->data_src.val = 0; + data->txn = 0; } extern void perf_output_sample(struct perf_output_handle *handle, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655a5d3..da48837d617 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -136,8 +136,9 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT = 1U << 14, PERF_SAMPLE_DATA_SRC = 1U << 15, PERF_SAMPLE_IDENTIFIER = 1U << 16, + PERF_SAMPLE_TRANSACTION = 1U << 17, - PERF_SAMPLE_MAX = 1U << 17, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */ }; /* @@ -180,6 +181,28 @@ enum perf_sample_regs_abi { PERF_SAMPLE_REGS_ABI_64 = 2, }; +/* + * Values for the memory transaction event qualifier, mostly for + * abort events. Multiple bits can be set. + */ +enum { + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + + PERF_TXN_MAX = (1 << 8), /* non-ABI */ + + /* bits 32..63 are reserved for the abort code */ + + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, +}; + /* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: -- cgit v1.2.3-70-g09d2 From c2d3f25dda016d9697c5416810d4528770f0a281 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 9 Oct 2013 14:08:09 +0200 Subject: uprobes: Remove the wrong __weak attribute linux/uprobes.h declares arch_uprobe_skip_sstep() as a weak function. But as there is no definition of generic version so when trying to build uprobes for an architecture that doesn't yet have a arch_uprobe_skip_sstep() implementation, the vmlinux will try to call arch_uprobe_skip_sstep() somehwere in Stupidhistan leading to a system crash. We rather want a proper link error so remove arch_uprobe_skip_sstep(). Signed-off-by: Ralf Baechle Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 06f28beed7c..e6fba627ea4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -123,7 +123,7 @@ extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); -extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { -- cgit v1.2.3-70-g09d2 From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 19:39:37 +0200 Subject: uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK uprobe_copy_process() does nothing if the child shares ->mm with the forking process, but there is a special case: CLONE_VFORK. In this case it would be more correct to do dup_utask() but avoid dup_xol(). This is not that important, the child should not unwind its stack too much, this can corrupt the parent's stack, but at least we need this to allow to ret-probe __vfork() itself. Note: in theory, it would be better to check task_pt_regs(p)->sp instead of CLONE_VFORK, we need to dup_utask() if and only if the child can return from the function called by the parent. But this needs the arch-dependant helper, and I think that nobody actually does clone(same_stack, CLONE_VM). Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 4 ++-- kernel/events/uprobes.c | 10 ++++++++-- kernel/fork.c | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e6fba627ea4..9e0d5a6fe7a 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -117,7 +117,7 @@ extern void uprobe_start_dup_mmap(void); extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); -extern void uprobe_copy_process(struct task_struct *t); +extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); @@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) static inline void uprobe_free_utask(struct task_struct *t) { } -static inline void uprobe_copy_process(struct task_struct *t) +static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags) { } static inline void uprobe_clear_state(struct mm_struct *mm) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9f282e14925..ae9e1d2ef25 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1415,7 +1415,7 @@ static void dup_xol_work(struct callback_head *work) /* * Called in context of a new clone/fork from copy_process. */ -void uprobe_copy_process(struct task_struct *t) +void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; @@ -1424,7 +1424,10 @@ void uprobe_copy_process(struct task_struct *t) t->utask = NULL; - if (mm == t->mm || !utask || !utask->return_instances) + if (!utask || !utask->return_instances) + return; + + if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) @@ -1435,6 +1438,9 @@ void uprobe_copy_process(struct task_struct *t) if (!area) return uprobe_warn(t, "dup xol area"); + if (mm == t->mm) + return; + /* TODO: move it into the union in uprobe_task */ work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) diff --git a/kernel/fork.c b/kernel/fork.c index d3603b81246..8531609b6a8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1489,7 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, perf_event_fork(p); trace_task_newtask(p, clone_flags); - uprobe_copy_process(p); + uprobe_copy_process(p, clone_flags); return p; -- cgit v1.2.3-70-g09d2 From 3820b4d2789f5166afdb136bb14f93166e6cfbc2 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Tue, 15 Oct 2013 17:04:16 -0400 Subject: uprobes: Move function declarations out of arch Move the function declarations from the arch headers to the common header, since only the function bodies are architecture-specific. These changes are from Vincent Rabin's uprobes patch. [ oleg: update arch/powerpc/include/asm/uprobes.h ] Signed-off-by: Rabin Vincent Signed-off-by: David A. Long Signed-off-by: Oleg Nesterov --- arch/powerpc/include/asm/uprobes.h | 7 ------- arch/x86/include/asm/uprobes.h | 7 ------- include/linux/uprobes.h | 8 ++++++++ 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 23016020915..b6fc3178372 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -45,11 +45,4 @@ struct arch_uprobe_task { unsigned long saved_trap_nr; }; -extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); -extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); -extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); -extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 6e5197910fd..b20b4d68b93 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -49,11 +49,4 @@ struct arch_uprobe_task { unsigned int saved_tf; }; -extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); -extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); -extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); -extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); -extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #endif /* _ASM_UPROBES_H */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 9e0d5a6fe7a..28473e3f606 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -30,6 +30,7 @@ struct vm_area_struct; struct mm_struct; struct inode; +struct notifier_block; #ifdef CONFIG_ARCH_SUPPORTS_UPROBES # include @@ -125,6 +126,13 @@ extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); +extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); +extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); +extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v1.2.3-70-g09d2 From f72d41fa902fb19a9b63028202a400b0ce497491 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 5 Nov 2013 19:50:39 +0100 Subject: uprobes: Export write_opcode() as uprobe_write_opcode() set_swbp() and set_orig_insn() are __weak, but this is pointless because write_opcode() is static. Export write_opcode() as uprobe_write_opcode() for the upcoming arm port, this way it can actually override set_swbp() and use __opcode_to_mem_arm(bpinsn) instead if UPROBE_SWBP_INSN. Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 28473e3f606..319eae70fe8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -109,6 +109,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); +extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index fbcff61b509..0ac346ae5ed 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -245,12 +245,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * the architecture. If an arch has variable length instruction and the * breakpoint instruction is not of the smallest length instruction * supported by that architecture then we need to modify is_trap_at_addr and - * write_opcode accordingly. This would never be a problem for archs that - * have fixed length instructions. + * uprobe_write_opcode accordingly. This would never be a problem for archs + * that have fixed length instructions. */ /* - * write_opcode - write the opcode at a given virtual address. + * uprobe_write_opcode - write the opcode at a given virtual address. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. @@ -261,7 +261,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * For mm @mm, write the opcode at @vaddr. * Return 0 (success) or a negative errno. */ -static int write_opcode(struct mm_struct *mm, unsigned long vaddr, +int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; @@ -315,7 +315,7 @@ put_old: */ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(mm, vaddr, UPROBE_SWBP_INSN); + return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); } /** @@ -330,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -577,7 +577,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (ret) goto out; - /* write_opcode() assumes we don't cross page boundary */ + /* uprobe_write_opcode() assumes we don't cross page boundary */ BUG_ON((uprobe->offset & ~PAGE_MASK) + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); -- cgit v1.2.3-70-g09d2