From ddb69f276c4af8bb47ad4f24a72f72ddf58c228a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Mar 2014 15:16:22 +0200 Subject: uprobes/x86: Fold prepare_fixups() into arch_uprobe_analyze_insn() No functional changes, preparation. Shift the code from prepare_fixups() to arch_uprobe_analyze_insn() with the following modifications: - Do not call insn_get_opcode() again, it was already called by validate_insn_bits(). - Move "case 0xea" up. This way "case 0xff" can fall through to default case. - change "case 0xff" to use the nested "switch (MODRM_REG)", this way the code looks a bit simpler. - Make the comments look consistent. While at it, kill the initialization of rip_rela_target_address and ->fixups, we can rely on kzalloc(). We will add the new members into arch_uprobe, it would be better to assume that everything is zero by default. TODO: cleanup/fix the mess in validate_insn_bits() paths: - validate_insn_64bits() and validate_insn_32bits() should be unified. - "ifdef" is not used consistently; if good_insns_64 depends on CONFIG_X86_64, then probably good_insns_32 should depend on CONFIG_X86_32/EMULATION - the usage of mm->context.ia32_compat looks wrong if the task is TIF_X32. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 110 ++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 63 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 2ed845928b5..098e56ec795 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -53,7 +53,7 @@ #define OPCODE1(insn) ((insn)->opcode.bytes[0]) #define OPCODE2(insn) ((insn)->opcode.bytes[1]) #define OPCODE3(insn) ((insn)->opcode.bytes[2]) -#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) +#define MODRM_REG(insn) X86_MODRM_REG((insn)->modrm.value) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) return -ENOTSUPP; } -/* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, and - * annotate arch_uprobe->fixups accordingly. To start with, - * arch_uprobe->fixups is either zero or it reflects rip-related fixups. - */ -static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) -{ - bool fix_ip = true, fix_call = false; /* defaults */ - int reg; - - insn_get_opcode(insn); /* should be a nop */ - - switch (OPCODE1(insn)) { - case 0x9d: - /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; - break; - case 0xc3: /* ret/lret */ - case 0xcb: - case 0xc2: - case 0xca: - /* ip is correct */ - fix_ip = false; - break; - case 0xe8: /* call relative - Fix return addr */ - fix_call = true; - break; - case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xff: - insn_get_modrm(insn); - reg = MODRM_REG(insn); - if (reg == 2 || reg == 3) { - /* call or lcall, indirect */ - /* Fix return addr; ip is correct. */ - fix_call = true; - fix_ip = false; - } else if (reg == 4 || reg == 5) { - /* jmp or ljmp, indirect */ - /* ip is correct. */ - fix_ip = false; - } - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; - break; - default: - break; - } - if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; -} - #ifdef CONFIG_X86_64 /* * If arch_uprobe->insn doesn't use rip-relative addressing, return @@ -318,7 +261,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins if (mm->context.ia32_compat) return; - auprobe->rip_rela_target_address = 0x0; if (!insn_rip_relative(insn)) return; @@ -421,16 +363,58 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, */ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { - int ret; struct insn insn; + bool fix_ip = true, fix_call = false; + int ret; - auprobe->fixups = 0; ret = validate_insn_bits(auprobe, mm, &insn); - if (ret != 0) + if (ret) return ret; + /* + * Figure out which fixups arch_uprobe_post_xol() will need to perform, + * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups + * is either zero or it reflects rip-related fixups. + */ handle_riprel_insn(auprobe, mm, &insn); - prepare_fixups(auprobe, &insn); + + switch (OPCODE1(&insn)) { + case 0x9d: /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; + case 0xc3: /* ret or lret -- ip is correct */ + case 0xcb: + case 0xc2: + case 0xca: + fix_ip = false; + break; + case 0xe8: /* call relative - Fix return addr */ + fix_call = true; + break; + case 0x9a: /* call absolute - Fix return addr, not ip */ + fix_call = true; + fix_ip = false; + break; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip = false; + break; + case 0xff: + insn_get_modrm(&insn); + switch (MODRM_REG(&insn)) { + case 2: case 3: /* call or lcall, indirect */ + fix_call = true; + case 4: case 5: /* jmp or ljmp, indirect */ + fix_ip = false; + } + break; + default: + break; + } + + if (fix_ip) + auprobe->fixups |= UPROBE_FIX_IP; + if (fix_call) + auprobe->fixups |= UPROBE_FIX_CALL; return 0; } -- cgit v1.2.3-70-g09d2 From 59078d4b96bb548f97d9fb429b929a289e4884d9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Mar 2014 18:09:36 +0200 Subject: uprobes/x86: Kill the "ia32_compat" check in handle_riprel_insn(), remove "mm" arg Kill the "mm->context.ia32_compat" check in handle_riprel_insn(), if it is true insn_rip_relative() must return false. validate_insn_bits() passed "ia32_compat" as !x86_64 to insn_init(), and insn_rip_relative() checks insn->x86_64. Also, remove the no longer needed "struct mm_struct *mm" argument and the unnecessary "return" at the end. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 098e56ec795..963c121c030 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -253,14 +253,11 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) * - The displacement is always 4 bytes. */ static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; - if (mm->context.ia32_compat) - return; - if (!insn_rip_relative(insn)) return; @@ -314,7 +311,6 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins cursor++; memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); } - return; } static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) @@ -343,7 +339,7 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, return validate_insn_64bits(auprobe, insn); } #else /* 32-bit: */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) { /* No RIP-relative addressing on 32-bit */ } @@ -376,7 +372,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups * is either zero or it reflects rip-related fixups. */ - handle_riprel_insn(auprobe, mm, &insn); + handle_riprel_insn(auprobe, &insn); switch (OPCODE1(&insn)) { case 0x9d: /* popf */ -- cgit v1.2.3-70-g09d2 From d20737c07a1063d681fe9fb86f3da369da1edab7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Mar 2014 18:35:09 +0200 Subject: uprobes/x86: Gather "riprel" functions together Cosmetic. Move pre_xol_rip_insn() and handle_riprel_post_xol() up to the closely related handle_riprel_insn(). This way it is simpler to read and understand this code, and this lessens the number of ifdef's. While at it, update the comment in handle_riprel_post_xol() as Jim suggested. TODO: rename them somehow to make the naming consistent. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 118 +++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 65 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 963c121c030..c52c30fa787 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -313,6 +313,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) } } +/* + * If we're emulating a rip-relative instruction, save the contents + * of the scratch register and store the target address in that register. + */ +static void +pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, + struct arch_uprobe_task *autask) +{ + if (auprobe->fixups & UPROBE_FIX_RIP_AX) { + autask->saved_scratch_register = regs->ax; + regs->ax = current->utask->vaddr; + regs->ax += auprobe->rip_rela_target_address; + } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { + autask->saved_scratch_register = regs->cx; + regs->cx = current->utask->vaddr; + regs->cx += auprobe->rip_rela_target_address; + } +} + +static void +handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +{ + if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { + struct arch_uprobe_task *autask; + + autask = ¤t->utask->autask; + if (auprobe->fixups & UPROBE_FIX_RIP_AX) + regs->ax = autask->saved_scratch_register; + else + regs->cx = autask->saved_scratch_register; + + /* + * The original instruction includes a displacement, and so + * is 4 bytes longer than what we've just single-stepped. + * Caller may need to apply other fixups to handle stuff + * like "jmpq *...(%rip)" and "callq *...(%rip)". + */ + if (correction) + *correction += 4; + } +} + static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) { insn_init(insn, auprobe->insn, true); @@ -339,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, return validate_insn_64bits(auprobe, insn); } #else /* 32-bit: */ +/* + * No RIP-relative addressing on 32-bit + */ static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) { - /* No RIP-relative addressing on 32-bit */ +} +static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, + struct arch_uprobe_task *autask) +{ +} +static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, + long *correction) +{ } static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) @@ -415,34 +467,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, return 0; } -#ifdef CONFIG_X86_64 -/* - * If we're emulating a rip-relative instruction, save the contents - * of the scratch register and store the target address in that register. - */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) -{ - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; - } -} -#else -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) -{ - /* No RIP-relative addressing on 32-bit */ -} -#endif - /* * arch_uprobe_pre_xol - prepare to execute out of line. * @auprobe: the probepoint information. @@ -492,42 +516,6 @@ static int adjust_ret_addr(unsigned long sp, long correction) return 0; } -#ifdef CONFIG_X86_64 -static bool is_riprel_insn(struct arch_uprobe *auprobe) -{ - return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); -} - -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) -{ - if (is_riprel_insn(auprobe)) { - struct arch_uprobe_task *autask; - - autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; - - /* - * The original instruction includes a displacement, and so - * is 4 bytes longer than what we've just single-stepped. - * Fall through to handle stuff like "jmpq *...(%rip)" and - * "callq *...(%rip)". - */ - if (correction) - *correction += 4; - } -} -#else -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) -{ - /* No RIP-relative addressing on 32-bit */ -} -#endif - /* * If xol insn itself traps and generates a signal(Say, * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped -- cgit v1.2.3-70-g09d2 From 34e7317d6ae8f6111ac449444f22e14f4a14ebfd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Mar 2014 19:38:09 +0200 Subject: uprobes/x86: move the UPROBE_FIX_{RIP,IP,CALL} code at the end of pre/post hooks No functional changes. Preparation to simplify the review of the next change. Just reorder the code in arch_uprobe_pre/post_xol() functions so that UPROBE_FIX_{RIP_*,IP,CALL} logic goes to the end. Also change arch_uprobe_pre_xol() to use utask instead of autask, to make the code more symmetrical with arch_uprobe_post_xol(). Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Reviewed-by: Jim Keniston Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index c52c30fa787..3bb4198aa58 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -474,19 +474,18 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, */ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - struct arch_uprobe_task *autask; + struct uprobe_task *utask = current->utask; - autask = ¤t->utask->autask; - autask->saved_trap_nr = current->thread.trap_nr; + regs->ip = utask->xol_vaddr; + utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; - regs->ip = current->utask->xol_vaddr; - pre_xol_rip_insn(auprobe, regs, autask); - autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); + utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF); regs->flags |= X86_EFLAGS_TF; if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); + pre_xol_rip_insn(auprobe, regs, &utask->autask); return 0; } @@ -560,22 +559,13 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - struct uprobe_task *utask; + struct uprobe_task *utask = current->utask; long correction; int result = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); - utask = current->utask; current->thread.trap_nr = utask->autask.saved_trap_nr; - correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) - regs->ip += correction; - - if (auprobe->fixups & UPROBE_FIX_CALL) - result = adjust_ret_addr(regs->sp, correction); - /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need @@ -586,6 +576,14 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) else if (!(auprobe->fixups & UPROBE_FIX_SETF)) regs->flags &= ~X86_EFLAGS_TF; + correction = (long)(utask->vaddr - utask->xol_vaddr); + handle_riprel_post_xol(auprobe, regs, &correction); + if (auprobe->fixups & UPROBE_FIX_IP) + regs->ip += correction; + + if (auprobe->fixups & UPROBE_FIX_CALL) + result = adjust_ret_addr(regs->sp, correction); + return result; } -- cgit v1.2.3-70-g09d2 From 8ad8e9d3fd64f101eed6652964670672d699e563 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Mar 2014 21:01:31 +0200 Subject: uprobes/x86: Introduce uprobe_xol_ops and arch_uprobe->ops Introduce arch_uprobe->ops pointing to the "struct uprobe_xol_ops", move the current UPROBE_FIX_{RIP*,IP,CALL} code into the default set of methods and change arch_uprobe_pre/post_xol() accordingly. This way we can add the new uprobe_xol_ops's to handle the insns which need the special processing (rip-relative jmp/call at least). Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Reviewed-by: Masami Hiramatsu --- arch/x86/include/asm/uprobes.h | 7 ++- arch/x86/kernel/uprobes.c | 107 ++++++++++++++++++++++++++--------------- 2 files changed, 74 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 3087ea9c5f2..9f8210bcbb4 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -33,12 +33,17 @@ typedef u8 uprobe_opcode_t; #define UPROBE_SWBP_INSN 0xcc #define UPROBE_SWBP_INSN_SIZE 1 +struct uprobe_xol_ops; + struct arch_uprobe { - u16 fixups; union { u8 insn[MAX_UINSN_BYTES]; u8 ixol[MAX_UINSN_BYTES]; }; + + u16 fixups; + const struct uprobe_xol_ops *ops; + #ifdef CONFIG_X86_64 unsigned long rip_rela_target_address; #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3bb4198aa58..13ad8a38c2d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -402,6 +402,64 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, } #endif /* CONFIG_X86_64 */ +struct uprobe_xol_ops { + bool (*emulate)(struct arch_uprobe *, struct pt_regs *); + int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); + int (*post_xol)(struct arch_uprobe *, struct pt_regs *); +}; + +static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + return 0; +} + +/* + * Adjust the return address pushed by a call insn executed out of line. + */ +static int adjust_ret_addr(unsigned long sp, long correction) +{ + int rasize, ncopied; + long ra = 0; + + if (is_ia32_task()) + rasize = 4; + else + rasize = 8; + + ncopied = copy_from_user(&ra, (void __user *)sp, rasize); + if (unlikely(ncopied)) + return -EFAULT; + + ra += correction; + ncopied = copy_to_user((void __user *)sp, &ra, rasize); + if (unlikely(ncopied)) + return -EFAULT; + + return 0; +} + +static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + long correction = (long)(utask->vaddr - utask->xol_vaddr); + int ret = 0; + + handle_riprel_post_xol(auprobe, regs, &correction); + if (auprobe->fixups & UPROBE_FIX_IP) + regs->ip += correction; + + if (auprobe->fixups & UPROBE_FIX_CALL) + ret = adjust_ret_addr(regs->sp, correction); + + return ret; +} + +static struct uprobe_xol_ops default_xol_ops = { + .pre_xol = default_pre_xol_op, + .post_xol = default_post_xol_op, +}; + /** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @mm: the probed address space. @@ -464,6 +522,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, if (fix_call) auprobe->fixups |= UPROBE_FIX_CALL; + auprobe->ops = &default_xol_ops; return 0; } @@ -485,33 +544,8 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); - pre_xol_rip_insn(auprobe, regs, &utask->autask); - return 0; -} - -/* - * This function is called by arch_uprobe_post_xol() to adjust the return - * address pushed by a call instruction executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) -{ - int rasize, ncopied; - long ra = 0; - - if (is_ia32_task()) - rasize = 4; - else - rasize = 8; - - ncopied = copy_from_user(&ra, (void __user *)sp, rasize); - if (unlikely(ncopied)) - return -EFAULT; - - ra += correction; - ncopied = copy_to_user((void __user *)sp, &ra, rasize); - if (unlikely(ncopied)) - return -EFAULT; - + if (auprobe->ops->pre_xol) + return auprobe->ops->pre_xol(auprobe, regs); return 0; } @@ -560,11 +594,8 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - long correction; - int result = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); - current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP @@ -576,15 +607,9 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) else if (!(auprobe->fixups & UPROBE_FIX_SETF)) regs->flags &= ~X86_EFLAGS_TF; - correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) - regs->ip += correction; - - if (auprobe->fixups & UPROBE_FIX_CALL) - result = adjust_ret_addr(regs->sp, correction); - - return result; + if (auprobe->ops->post_xol) + return auprobe->ops->post_xol(auprobe, regs); + return 0; } /* callback routine for handling exceptions. */ @@ -642,6 +667,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; + if (auprobe->ops->emulate) + return auprobe->ops->emulate(auprobe, regs); + + /* TODO: move this code into ->emulate() hook */ for (i = 0; i < MAX_UINSN_BYTES; i++) { if (auprobe->insn[i] == 0x66) continue; -- cgit v1.2.3-70-g09d2 From e55848a4f8ee52465771983e144f0c3337776eda Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 31 Mar 2014 17:24:14 +0200 Subject: uprobes/x86: Conditionalize the usage of handle_riprel_insn() arch_uprobe_analyze_insn() calls handle_riprel_insn() at the start, but only "0xff" and "default" cases need the UPROBE_FIX_RIP_ logic. Move the callsite into "default" case and change the "0xff" case to fall-through. We are going to add the various hooks to handle the rip-relative jmp/call instructions (and more), we need this change to enforce the fact that the new code can not conflict with is_riprel_insn() logic which, after this change, can only be used by default_xol_ops. Note: arch_uprobe_abort_xol() still calls handle_riprel_post_xol() directly. This is fine unless another _xol_ops we may add later will need to reuse "UPROBE_FIX_RIP_AX|UPROBE_FIX_RIP_CX" bits in ->fixup. In this case we can add uprobe_xol_ops->abort() hook, which (perhaps) we will need anyway in the long term. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston Reviewed-by: Masami Hiramatsu --- arch/x86/kernel/uprobes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 13ad8a38c2d..08cdb82815f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -482,8 +482,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups * is either zero or it reflects rip-related fixups. */ - handle_riprel_insn(auprobe, &insn); - switch (OPCODE1(&insn)) { case 0x9d: /* popf */ auprobe->fixups |= UPROBE_FIX_SETF; @@ -512,9 +510,9 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, case 4: case 5: /* jmp or ljmp, indirect */ fix_ip = false; } - break; + /* fall through */ default: - break; + handle_riprel_insn(auprobe, &insn); } if (fix_ip) -- cgit v1.2.3-70-g09d2 From 014940bad8e46ca7bd0483f760f9cba60088a3d4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 3 Apr 2014 20:20:10 +0200 Subject: uprobes/x86: Send SIGILL if arch_uprobe_post_xol() fails Currently the error from arch_uprobe_post_xol() is silently ignored. This doesn't look good and this can lead to the hard-to-debug problems. 1. Change handle_singlestep() to loudly complain and send SIGILL. Note: this only affects x86, ppc/arm can't fail. 2. Change arch_uprobe_post_xol() to call arch_uprobe_abort_xol() and avoid TF games if it is going to return an error. This can help to to analyze the problem, if nothing else we should not report ->ip = xol_slot in the core-file. Note: this means that handle_riprel_post_xol() can be called twice, but this is fine because it is idempotent. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 16 ++++++++++++---- kernel/events/uprobes.c | 8 +++++++- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 08cdb82815f..e72903eacd4 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -594,6 +594,15 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) struct uprobe_task *utask = current->utask; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + + if (auprobe->ops->post_xol) { + int err = auprobe->ops->post_xol(auprobe, regs); + if (err) { + arch_uprobe_abort_xol(auprobe, regs); + return err; + } + } + current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP @@ -605,8 +614,6 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) else if (!(auprobe->fixups & UPROBE_FIX_SETF)) regs->flags &= ~X86_EFLAGS_TF; - if (auprobe->ops->post_xol) - return auprobe->ops->post_xol(auprobe, regs); return 0; } @@ -641,8 +648,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, so reset the instruction pointer to its - * probed address. + * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. + * Reset the instruction pointer to its probed address for the potential + * restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ea2a7c0728b..d1edc5e6fd0 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1867,10 +1867,11 @@ out: static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) { struct uprobe *uprobe; + int err = 0; uprobe = utask->active_uprobe; if (utask->state == UTASK_SSTEP_ACK) - arch_uprobe_post_xol(&uprobe->arch, regs); + err = arch_uprobe_post_xol(&uprobe->arch, regs); else if (utask->state == UTASK_SSTEP_TRAPPED) arch_uprobe_abort_xol(&uprobe->arch, regs); else @@ -1884,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) spin_lock_irq(¤t->sighand->siglock); recalc_sigpending(); /* see uprobe_deny_signal() */ spin_unlock_irq(¤t->sighand->siglock); + + if (unlikely(err)) { + uprobe_warn(current, "execute the probed insn, sending SIGILL."); + force_sig_info(SIGILL, SEND_SIG_FORCED, current); + } } /* -- cgit v1.2.3-70-g09d2 From 75f9ef0b7f1aae33b7be7ba8d9c23c8cb48c2212 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 3 Apr 2014 20:52:19 +0200 Subject: uprobes/x86: Teach arch_uprobe_post_xol() to restart if possible SIGILL after the failed arch_uprobe_post_xol() should only be used as a last resort, we should try to restart the probed insn if possible. Currently only adjust_ret_addr() can fail, and this can only happen if another thread unmapped our stack after we executed "call" out-of-line. Most probably the application if buggy, but even in this case it can have a handler for SIGSEGV/etc. And in theory it can be even correct and do something non-trivial with its memory. Of course we can't restart unconditionally, so arch_uprobe_post_xol() does this only if ->post_xol() returns -ERESTART even if currently this is the only possible error. default_post_xol_op(UPROBE_FIX_CALL) can always restart, but as Jim pointed out it should not forget to pop off the return address pushed by this insn executed out-of-line. Note: this is not "perfect", we do not want the extra handler_chain() after restart, but I think this is the best solution we can realistically do without too much uglifications. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index e72903eacd4..cdd6909affc 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -443,16 +443,22 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs { struct uprobe_task *utask = current->utask; long correction = (long)(utask->vaddr - utask->xol_vaddr); - int ret = 0; handle_riprel_post_xol(auprobe, regs, &correction); if (auprobe->fixups & UPROBE_FIX_IP) regs->ip += correction; - if (auprobe->fixups & UPROBE_FIX_CALL) - ret = adjust_ret_addr(regs->sp, correction); + if (auprobe->fixups & UPROBE_FIX_CALL) { + if (adjust_ret_addr(regs->sp, correction)) { + if (is_ia32_task()) + regs->sp += 4; + else + regs->sp += 8; + return -ERESTART; + } + } - return ret; + return 0; } static struct uprobe_xol_ops default_xol_ops = { @@ -599,6 +605,12 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) int err = auprobe->ops->post_xol(auprobe, regs); if (err) { arch_uprobe_abort_xol(auprobe, regs); + /* + * Restart the probed insn. ->post_xol() must ensure + * this is really possible if it returns -ERESTART. + */ + if (err == -ERESTART) + return 0; return err; } } -- cgit v1.2.3-70-g09d2 From 8faaed1b9f500d6cf32702716733a645c9b0727a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Apr 2014 17:16:10 +0200 Subject: uprobes/x86: Introduce sizeof_long(), cleanup adjust_ret_addr() and arch_uretprobe_hijack_return_addr() 1. Add the trivial sizeof_long() helper and change other callers of is_ia32_task() to use it. TODO: is_ia32_task() is not what we actually want, TS_COMPAT does not necessarily mean 32bit. Fortunately syscall-like insns can't be probed so it actually works, but it would be better to rename and use is_ia32_frame(). 2. As Jim pointed out "ncopied" in arch_uretprobe_hijack_return_addr() and adjust_ret_addr() should be named "nleft". And in fact only the last copy_to_user() in arch_uretprobe_hijack_return_addr() actually needs to inspect the non-zero error code. TODO: adjust_ret_addr() should die. We can always calculate the value we need to write into *regs->sp, just UPROBE_FIX_CALL should record insn->length. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index cdd6909affc..aecc2205438 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -408,6 +408,11 @@ struct uprobe_xol_ops { int (*post_xol)(struct arch_uprobe *, struct pt_regs *); }; +static inline int sizeof_long(void) +{ + return is_ia32_task() ? 4 : 8; +} + static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); @@ -419,21 +424,14 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) */ static int adjust_ret_addr(unsigned long sp, long correction) { - int rasize, ncopied; - long ra = 0; - - if (is_ia32_task()) - rasize = 4; - else - rasize = 8; + int rasize = sizeof_long(); + long ra; - ncopied = copy_from_user(&ra, (void __user *)sp, rasize); - if (unlikely(ncopied)) + if (copy_from_user(&ra, (void __user *)sp, rasize)) return -EFAULT; ra += correction; - ncopied = copy_to_user((void __user *)sp, &ra, rasize); - if (unlikely(ncopied)) + if (copy_to_user((void __user *)sp, &ra, rasize)) return -EFAULT; return 0; @@ -450,10 +448,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs if (auprobe->fixups & UPROBE_FIX_CALL) { if (adjust_ret_addr(regs->sp, correction)) { - if (is_ia32_task()) - regs->sp += 4; - else - regs->sp += 8; + regs->sp += sizeof_long(); return -ERESTART; } } @@ -714,23 +709,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) { - int rasize, ncopied; + int rasize = sizeof_long(), nleft; unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ - rasize = is_ia32_task() ? 4 : 8; - ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize); - if (unlikely(ncopied)) + if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) return -1; /* check whether address has been already hijacked */ if (orig_ret_vaddr == trampoline_vaddr) return orig_ret_vaddr; - ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); - if (likely(!ncopied)) + nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize); + if (likely(!nleft)) return orig_ret_vaddr; - if (ncopied != rasize) { + if (nleft != rasize) { pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); -- cgit v1.2.3-70-g09d2 From 7ba6db2d688bdf83049a18c8e55b2d1e58e8b0bc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 5 Apr 2014 20:05:02 +0200 Subject: uprobes/x86: Emulate unconditional relative jmp's Currently we always execute all insns out-of-line, including relative jmp's and call's. This assumes that even if regs->ip points to nowhere after the single-step, default_post_xol_op(UPROBE_FIX_IP) logic will update it correctly. However, this doesn't work if this regs->ip == xol_vaddr + insn_offset is not canonical. In this case CPU generates #GP and general_protection() kills the task which tries to execute this insn out-of-line. Now that we have uprobe_xol_ops we can teach uprobes to emulate these insns and solve the problem. This patch adds branch_xol_ops which has a single branch_emulate_op() hook, so far it can only handle rel8/32 relative jmp's. TODO: move ->fixup into the union along with rip_rela_target_address. Signed-off-by: Oleg Nesterov Reported-by: Jonathan Lebon Reviewed-by: Jim Keniston --- arch/x86/include/asm/uprobes.h | 8 +++++++- arch/x86/kernel/uprobes.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 9f8210bcbb4..e9fd4d5537e 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -44,9 +44,15 @@ struct arch_uprobe { u16 fixups; const struct uprobe_xol_ops *ops; + union { #ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; + unsigned long rip_rela_target_address; #endif + struct { + s32 offs; + u8 ilen; + } branch; + }; }; struct arch_uprobe_task { diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index aecc2205438..c3baeaacf1b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -461,6 +461,40 @@ static struct uprobe_xol_ops default_xol_ops = { .post_xol = default_post_xol_op, }; +static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + regs->ip += auprobe->branch.ilen + auprobe->branch.offs; + return true; +} + +static struct uprobe_xol_ops branch_xol_ops = { + .emulate = branch_emulate_op, +}; + +/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ +static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) +{ + + switch (OPCODE1(insn)) { + case 0xeb: /* jmp 8 */ + case 0xe9: /* jmp 32 */ + break; + default: + return -ENOSYS; + } + + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; + + auprobe->branch.ilen = insn->length; + auprobe->branch.offs = insn->immediate.value; + + auprobe->ops = &branch_xol_ops; + return 0; +} + /** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @mm: the probed address space. @@ -478,6 +512,10 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, if (ret) return ret; + ret = branch_setup_xol_ops(auprobe, &insn); + if (ret != -ENOSYS) + return ret; + /* * Figure out which fixups arch_uprobe_post_xol() will need to perform, * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups -- cgit v1.2.3-70-g09d2 From d241006354c550c7d22f304e2fdf90137fb8eaab Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 5 Apr 2014 21:06:10 +0200 Subject: uprobes/x86: Emulate nop's using ops->emulate() Finally we can kill the ugly (and very limited) code in __skip_sstep(). Just change branch_setup_xol_ops() to treat "nop" as jmp to the next insn. Thanks to lib/insn.c, it is clever enough. OPCODE1() == 0x90 includes "(rep;)+ nop;" at least, and (afaics) much more. Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index c3baeaacf1b..f3c4212f381 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -478,6 +478,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) switch (OPCODE1(insn)) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ + case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ break; default: return -ENOSYS; @@ -710,29 +711,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) regs->flags &= ~X86_EFLAGS_TF; } -/* - * Skip these instructions as per the currently known x86 ISA. - * rep=0x66*; nop=0x90 - */ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - int i; - if (auprobe->ops->emulate) return auprobe->ops->emulate(auprobe, regs); - - /* TODO: move this code into ->emulate() hook */ - for (i = 0; i < MAX_UINSN_BYTES; i++) { - if (auprobe->insn[i] == 0x66) - continue; - - if (auprobe->insn[i] == 0x90) { - regs->ip += i + 1; - return true; - } - - break; - } return false; } -- cgit v1.2.3-70-g09d2 From 8e89c0be171b1a9ed2ba67168733ca811bb45d5c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Apr 2014 18:11:02 +0200 Subject: uprobes/x86: Emulate relative call's See the previous "Emulate unconditional relative jmp's" which explains why we can not execute "jmp" out-of-line, the same applies to "call". Emulating of rip-relative call is trivial, we only need to additionally push the ret-address. If this fails, we execute this instruction out of line and this should trigger the trap, the probed application should die or the same insn will be restarted if a signal handler expands the stack. We do not even need ->post_xol() for this case. But there is a corner (and almost theoretical) case: another thread can expand the stack right before we execute this insn out of line. In this case it hit the same problem we are trying to solve. So we simply turn the probed insn into "call 1f; 1:" and add ->post_xol() which restores ->sp and restarts. Many thanks to Jonathan who finally found the standalone reproducer, otherwise I would never resolve the "random SIGSEGV's under systemtap" bug-report. Now that the problem is clear we can write the simplified test-case: void probe_func(void), callee(void); int failed = 1; asm ( ".text\n" ".align 4096\n" ".globl probe_func\n" "probe_func:\n" "call callee\n" "ret" ); /* * This assumes that: * * - &probe_func = 0x401000 + a_bit, aligned = 0x402000 * * - xol_vma->vm_start = TASK_SIZE_MAX - PAGE_SIZE = 0x7fffffffe000 * as xol_add_vma() asks; the 1st slot = 0x7fffffffe080 * * so we can target the non-canonical address from xol_vma using * the simple math below, 100 * 4096 is just the random offset */ asm (".org . + 0x800000000000 - 0x7fffffffe080 - 5 - 1 + 100 * 4096\n"); void callee(void) { failed = 0; } int main(void) { probe_func(); return failed; } It SIGSEGV's if you probe "probe_func" (although this is not very reliable, randomize_va_space/etc can change the placement of xol area). Note: as Denys Vlasenko pointed out, amd and intel treat "callw" (0x66 0xe8) differently. This patch relies on lib/insn.c and thus implements the intel's behaviour: 0x66 is simply ignored. Fortunately nothing sane should ever use this insn, so we postpone the fix until we decide what should we do; emulate or not, support or not, etc. Reported-by: Jonathan Lebon Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/include/asm/uprobes.h | 1 + arch/x86/kernel/uprobes.c | 80 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index e9fd4d5537e..93bee7b9385 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -51,6 +51,7 @@ struct arch_uprobe { struct { s32 offs; u8 ilen; + u8 opc1; } branch; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index f3c4212f381..0914435001f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -461,34 +461,97 @@ static struct uprobe_xol_ops default_xol_ops = { .post_xol = default_post_xol_op, }; +static bool branch_is_call(struct arch_uprobe *auprobe) +{ + return auprobe->branch.opc1 == 0xe8; +} + static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - regs->ip += auprobe->branch.ilen + auprobe->branch.offs; + unsigned long new_ip = regs->ip += auprobe->branch.ilen; + + if (branch_is_call(auprobe)) { + unsigned long new_sp = regs->sp - sizeof_long(); + /* + * If it fails we execute this (mangled, see the comment in + * branch_clear_offset) insn out-of-line. In the likely case + * this should trigger the trap, and the probed application + * should die or restart the same insn after it handles the + * signal, arch_uprobe_post_xol() won't be even called. + * + * But there is corner case, see the comment in ->post_xol(). + */ + if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + return false; + regs->sp = new_sp; + } + + regs->ip = new_ip + auprobe->branch.offs; return true; } +static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + BUG_ON(!branch_is_call(auprobe)); + /* + * We can only get here if branch_emulate_op() failed to push the ret + * address _and_ another thread expanded our stack before the (mangled) + * "call" insn was executed out-of-line. Just restore ->sp and restart. + * We could also restore ->ip and try to call branch_emulate_op() again. + */ + regs->sp += sizeof_long(); + return -ERESTART; +} + +static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn) +{ + /* + * Turn this insn into "call 1f; 1:", this is what we will execute + * out-of-line if ->emulate() fails. We only need this to generate + * a trap, so that the probed task receives the correct signal with + * the properly filled siginfo. + * + * But see the comment in ->post_xol(), in the unlikely case it can + * succeed. So we need to ensure that the new ->ip can not fall into + * the non-canonical area and trigger #GP. + * + * We could turn it into (say) "pushf", but then we would need to + * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte + * of ->insn[] for set_orig_insn(). + */ + memset(auprobe->insn + insn_offset_immediate(insn), + 0, insn->immediate.nbytes); +} + static struct uprobe_xol_ops branch_xol_ops = { .emulate = branch_emulate_op, + .post_xol = branch_post_xol_op, }; /* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { + u8 opc1 = OPCODE1(insn); + + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; - switch (OPCODE1(insn)) { + switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ break; + + case 0xe8: /* call relative */ + branch_clear_offset(auprobe, insn); + break; default: return -ENOSYS; } - /* has the side-effect of processing the entire instruction */ - insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) - return -ENOEXEC; - + auprobe->branch.opc1 = opc1; auprobe->branch.ilen = insn->length; auprobe->branch.offs = insn->immediate.value; @@ -532,9 +595,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, case 0xca: fix_ip = false; break; - case 0xe8: /* call relative - Fix return addr */ - fix_call = true; - break; case 0x9a: /* call absolute - Fix return addr, not ip */ fix_call = true; fix_ip = false; -- cgit v1.2.3-70-g09d2 From 8f95505bc18a026ef7d3dfdbce4e5b31b3e4fc1b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 6 Apr 2014 21:53:47 +0200 Subject: uprobes/x86: Emulate relative conditional "short" jmp's Teach branch_emulate_op() to emulate the conditional "short" jmp's which check regs->flags. Note: this doesn't support jcxz/jcexz, loope/loopz, and loopne/loopnz. They all are rel8 and thus they can't trigger the problem, but perhaps we will add the support in future just for completeness. Reported-by: Jonathan Lebon Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 57 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0914435001f..0460d04f0ac 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -466,9 +466,58 @@ static bool branch_is_call(struct arch_uprobe *auprobe) return auprobe->branch.opc1 == 0xe8; } +#define CASE_COND \ + COND(70, 71, XF(OF)) \ + COND(72, 73, XF(CF)) \ + COND(74, 75, XF(ZF)) \ + COND(78, 79, XF(SF)) \ + COND(7a, 7b, XF(PF)) \ + COND(76, 77, XF(CF) || XF(ZF)) \ + COND(7c, 7d, XF(SF) != XF(OF)) \ + COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF)) + +#define COND(op_y, op_n, expr) \ + case 0x ## op_y: DO((expr) != 0) \ + case 0x ## op_n: DO((expr) == 0) + +#define XF(xf) (!!(flags & X86_EFLAGS_ ## xf)) + +static bool is_cond_jmp_opcode(u8 opcode) +{ + switch (opcode) { + #define DO(expr) \ + return true; + CASE_COND + #undef DO + + default: + return false; + } +} + +static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + unsigned long flags = regs->flags; + + switch (auprobe->branch.opc1) { + #define DO(expr) \ + return expr; + CASE_COND + #undef DO + + default: /* not a conditional jmp */ + return true; + } +} + +#undef XF +#undef COND +#undef CASE_COND + static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { unsigned long new_ip = regs->ip += auprobe->branch.ilen; + unsigned long offs = (long)auprobe->branch.offs; if (branch_is_call(auprobe)) { unsigned long new_sp = regs->sp - sizeof_long(); @@ -484,9 +533,11 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) return false; regs->sp = new_sp; + } else if (!check_jmp_cond(auprobe, regs)) { + offs = 0; } - regs->ip = new_ip + auprobe->branch.offs; + regs->ip = new_ip + offs; return true; } @@ -547,8 +598,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) case 0xe8: /* call relative */ branch_clear_offset(auprobe, insn); break; + default: - return -ENOSYS; + if (!is_cond_jmp_opcode(opc1)) + return -ENOSYS; } auprobe->branch.opc1 = opc1; -- cgit v1.2.3-70-g09d2 From 6cc5e7ff2c38641060f20786a5caf2815edbca5f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 16:22:58 +0200 Subject: uprobes/x86: Emulate relative conditional "near" jmp's Change branch_setup_xol_ops() to simply use opc1 = OPCODE2(insn) - 0x10 if OPCODE1() == 0x0f; this matches the "short" jmp which checks the same condition. Thanks to lib/insn.c, it does the rest correctly. branch->ilen/offs are correct no matter if this jmp is "near" or "short". Reported-by: Jonathan Lebon Signed-off-by: Oleg Nesterov Reviewed-by: Jim Keniston --- arch/x86/kernel/uprobes.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0460d04f0ac..ace22916ade 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -599,6 +599,14 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) branch_clear_offset(auprobe, insn); break; + case 0x0f: + if (insn->opcode.nbytes != 2) + return -ENOSYS; + /* + * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches + * OPCODE1() of the "short" jmp which checks the same condition. + */ + opc1 = OPCODE2(insn) - 0x10; default: if (!is_cond_jmp_opcode(opc1)) return -ENOSYS; -- cgit v1.2.3-70-g09d2 From 4a3dc121d3c370625575247bf714db3f601d83e9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 18 Mar 2014 16:56:43 +0800 Subject: perf/x86: Export perf_assign_events() export perf_assign_events to allow building perf Intel uncore driver as module Signed-off-by: Yan, Zheng Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1395133004-23205-3-git-send-email-zheng.z.yan@intel.com Cc: Arnaldo Carvalho de Melo Cc: eranian@google.com Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ae407f7226c..89f3b7c1af2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -721,6 +721,7 @@ int perf_assign_events(struct perf_event **events, int n, return sched.state.unassigned; } +EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { -- cgit v1.2.3-70-g09d2 From 722e76e60f2775c21b087ff12c5e678cf0ebcaaf Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 15 May 2014 17:56:44 +0200 Subject: fix Haswell precise store data source encoding This patch fixes a bug in precise_store_data_hsw() whereby it would set the data source memory level to the wrong value. As per the the SDM Vol 3b Table 18-41 (Layout of Data Linear Address Information in PEBS Record), when status bit 0 is set this is a L1 hit, otherwise this is a L1 miss. This patch encodes the memory level according to the specification. In V2, we added the filtering on the store events. Only the following events produce L1 information: * MEM_UOPS_RETIRED.STLB_MISS_STORES * MEM_UOPS_RETIRED.LOCK_STORES * MEM_UOPS_RETIRED.SPLIT_STORES * MEM_UOPS_RETIRED.ALL_STORES Cc: mingo@elte.hu Cc: acme@ghostprotocols.net Cc: jolsa@redhat.com Cc: jmario@redhat.com Cc: ak@linux.intel.com Tested-and-Reviewed-by: Don Zickus Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140515155644.GA3884@quad Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ae96cfa5edd..980970cb744 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -108,15 +108,31 @@ static u64 precise_store_data(u64 status) return val; } -static u64 precise_store_data_hsw(u64 status) +static u64 precise_store_data_hsw(struct perf_event *event, u64 status) { union perf_mem_data_src dse; + u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; dse.val = 0; dse.mem_op = PERF_MEM_OP_STORE; dse.mem_lvl = PERF_MEM_LVL_NA; + + /* + * L1 info only valid for following events: + * + * MEM_UOPS_RETIRED.STLB_MISS_STORES + * MEM_UOPS_RETIRED.LOCK_STORES + * MEM_UOPS_RETIRED.SPLIT_STORES + * MEM_UOPS_RETIRED.ALL_STORES + */ + if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) + return dse.mem_lvl; + if (status & 1) - dse.mem_lvl = PERF_MEM_LVL_L1; + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + else + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + /* Nothing else supported. Sorry. */ return dse.val; } @@ -887,7 +903,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, data.data_src.val = load_latency_data(pebs->dse); else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) data.data_src.val = - precise_store_data_hsw(pebs->dse); + precise_store_data_hsw(event, pebs->dse); else data.data_src.val = precise_store_data(pebs->dse); } -- cgit v1.2.3-70-g09d2