summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/uprobes.h16
-rw-r--r--arch/x86/kernel/uprobes.c551
-rw-r--r--kernel/events/uprobes.c31
-rw-r--r--tools/perf/Documentation/perf-diff.txt21
-rw-r--r--tools/perf/Documentation/perf-report.txt24
-rw-r--r--tools/perf/Documentation/perf-top.txt18
-rw-r--r--tools/perf/builtin-diff.c32
-rw-r--r--tools/perf/builtin-kmem.c8
-rw-r--r--tools/perf/builtin-lock.c10
-rw-r--r--tools/perf/builtin-mem.c15
-rw-r--r--tools/perf/builtin-report.c22
-rw-r--r--tools/perf/builtin-sched.c10
-rw-r--r--tools/perf/builtin-top.c8
-rw-r--r--tools/perf/perf-completion.sh4
-rw-r--r--tools/perf/ui/browsers/hists.c39
-rw-r--r--tools/perf/ui/gtk/hists.c11
-rw-r--r--tools/perf/ui/hist.c8
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/hist.c59
-rw-r--r--tools/perf/util/hist.h10
-rw-r--r--tools/perf/util/symbol.h3
21 files changed, 602 insertions, 302 deletions
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 3087ea9c5f2..93bee7b9385 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -33,15 +33,27 @@ typedef u8 uprobe_opcode_t;
#define UPROBE_SWBP_INSN 0xcc
#define UPROBE_SWBP_INSN_SIZE 1
+struct uprobe_xol_ops;
+
struct arch_uprobe {
- u16 fixups;
union {
u8 insn[MAX_UINSN_BYTES];
u8 ixol[MAX_UINSN_BYTES];
};
+
+ u16 fixups;
+ const struct uprobe_xol_ops *ops;
+
+ union {
#ifdef CONFIG_X86_64
- unsigned long rip_rela_target_address;
+ unsigned long rip_rela_target_address;
#endif
+ struct {
+ s32 offs;
+ u8 ilen;
+ u8 opc1;
+ } branch;
+ };
};
struct arch_uprobe_task {
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 2ed845928b5..ace22916ade 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -53,7 +53,7 @@
#define OPCODE1(insn) ((insn)->opcode.bytes[0])
#define OPCODE2(insn) ((insn)->opcode.bytes[1])
#define OPCODE3(insn) ((insn)->opcode.bytes[2])
-#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
+#define MODRM_REG(insn) X86_MODRM_REG((insn)->modrm.value)
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -229,63 +229,6 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
return -ENOTSUPP;
}
-/*
- * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
- * annotate arch_uprobe->fixups accordingly. To start with,
- * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
- */
-static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
-{
- bool fix_ip = true, fix_call = false; /* defaults */
- int reg;
-
- insn_get_opcode(insn); /* should be a nop */
-
- switch (OPCODE1(insn)) {
- case 0x9d:
- /* popf */
- auprobe->fixups |= UPROBE_FIX_SETF;
- break;
- case 0xc3: /* ret/lret */
- case 0xcb:
- case 0xc2:
- case 0xca:
- /* ip is correct */
- fix_ip = false;
- break;
- case 0xe8: /* call relative - Fix return addr */
- fix_call = true;
- break;
- case 0x9a: /* call absolute - Fix return addr, not ip */
- fix_call = true;
- fix_ip = false;
- break;
- case 0xff:
- insn_get_modrm(insn);
- reg = MODRM_REG(insn);
- if (reg == 2 || reg == 3) {
- /* call or lcall, indirect */
- /* Fix return addr; ip is correct. */
- fix_call = true;
- fix_ip = false;
- } else if (reg == 4 || reg == 5) {
- /* jmp or ljmp, indirect */
- /* ip is correct. */
- fix_ip = false;
- }
- break;
- case 0xea: /* jmp absolute -- ip is correct */
- fix_ip = false;
- break;
- default:
- break;
- }
- if (fix_ip)
- auprobe->fixups |= UPROBE_FIX_IP;
- if (fix_call)
- auprobe->fixups |= UPROBE_FIX_CALL;
-}
-
#ifdef CONFIG_X86_64
/*
* If arch_uprobe->insn doesn't use rip-relative addressing, return
@@ -310,15 +253,11 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
* - The displacement is always 4 bytes.
*/
static void
-handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
{
u8 *cursor;
u8 reg;
- if (mm->context.ia32_compat)
- return;
-
- auprobe->rip_rela_target_address = 0x0;
if (!insn_rip_relative(insn))
return;
@@ -372,7 +311,48 @@ handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct ins
cursor++;
memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
}
- return;
+}
+
+/*
+ * If we're emulating a rip-relative instruction, save the contents
+ * of the scratch register and store the target address in that register.
+ */
+static void
+pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
+ struct arch_uprobe_task *autask)
+{
+ if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
+ autask->saved_scratch_register = regs->ax;
+ regs->ax = current->utask->vaddr;
+ regs->ax += auprobe->rip_rela_target_address;
+ } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
+ autask->saved_scratch_register = regs->cx;
+ regs->cx = current->utask->vaddr;
+ regs->cx += auprobe->rip_rela_target_address;
+ }
+}
+
+static void
+handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+{
+ if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
+ struct arch_uprobe_task *autask;
+
+ autask = &current->utask->autask;
+ if (auprobe->fixups & UPROBE_FIX_RIP_AX)
+ regs->ax = autask->saved_scratch_register;
+ else
+ regs->cx = autask->saved_scratch_register;
+
+ /*
+ * The original instruction includes a displacement, and so
+ * is 4 bytes longer than what we've just single-stepped.
+ * Caller may need to apply other fixups to handle stuff
+ * like "jmpq *...(%rip)" and "callq *...(%rip)".
+ */
+ if (correction)
+ *correction += 4;
+ }
}
static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
@@ -401,9 +381,19 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
return validate_insn_64bits(auprobe, insn);
}
#else /* 32-bit: */
-static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+/*
+ * No RIP-relative addressing on 32-bit
+ */
+static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn)
+{
+}
+static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
+ struct arch_uprobe_task *autask)
+{
+}
+static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
+ long *correction)
{
- /* No RIP-relative addressing on 32-bit */
}
static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
@@ -412,141 +402,311 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
}
#endif /* CONFIG_X86_64 */
-/**
- * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
- * @mm: the probed address space.
- * @arch_uprobe: the probepoint information.
- * @addr: virtual address at which to install the probepoint
- * Return 0 on success or a -ve number on error.
+struct uprobe_xol_ops {
+ bool (*emulate)(struct arch_uprobe *, struct pt_regs *);
+ int (*pre_xol)(struct arch_uprobe *, struct pt_regs *);
+ int (*post_xol)(struct arch_uprobe *, struct pt_regs *);
+};
+
+static inline int sizeof_long(void)
+{
+ return is_ia32_task() ? 4 : 8;
+}
+
+static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ pre_xol_rip_insn(auprobe, regs, &current->utask->autask);
+ return 0;
+}
+
+/*
+ * Adjust the return address pushed by a call insn executed out of line.
*/
-int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
+static int adjust_ret_addr(unsigned long sp, long correction)
{
- int ret;
- struct insn insn;
+ int rasize = sizeof_long();
+ long ra;
- auprobe->fixups = 0;
- ret = validate_insn_bits(auprobe, mm, &insn);
- if (ret != 0)
- return ret;
+ if (copy_from_user(&ra, (void __user *)sp, rasize))
+ return -EFAULT;
- handle_riprel_insn(auprobe, mm, &insn);
- prepare_fixups(auprobe, &insn);
+ ra += correction;
+ if (copy_to_user((void __user *)sp, &ra, rasize))
+ return -EFAULT;
return 0;
}
-#ifdef CONFIG_X86_64
-/*
- * If we're emulating a rip-relative instruction, save the contents
- * of the scratch register and store the target address in that register.
- */
-static void
-pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
- struct arch_uprobe_task *autask)
+static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
- autask->saved_scratch_register = regs->ax;
- regs->ax = current->utask->vaddr;
- regs->ax += auprobe->rip_rela_target_address;
- } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
- autask->saved_scratch_register = regs->cx;
- regs->cx = current->utask->vaddr;
- regs->cx += auprobe->rip_rela_target_address;
+ struct uprobe_task *utask = current->utask;
+ long correction = (long)(utask->vaddr - utask->xol_vaddr);
+
+ handle_riprel_post_xol(auprobe, regs, &correction);
+ if (auprobe->fixups & UPROBE_FIX_IP)
+ regs->ip += correction;
+
+ if (auprobe->fixups & UPROBE_FIX_CALL) {
+ if (adjust_ret_addr(regs->sp, correction)) {
+ regs->sp += sizeof_long();
+ return -ERESTART;
+ }
}
+
+ return 0;
}
-#else
-static void
-pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
- struct arch_uprobe_task *autask)
+
+static struct uprobe_xol_ops default_xol_ops = {
+ .pre_xol = default_pre_xol_op,
+ .post_xol = default_post_xol_op,
+};
+
+static bool branch_is_call(struct arch_uprobe *auprobe)
{
- /* No RIP-relative addressing on 32-bit */
+ return auprobe->branch.opc1 == 0xe8;
}
-#endif
-/*
- * arch_uprobe_pre_xol - prepare to execute out of line.
- * @auprobe: the probepoint information.
- * @regs: reflects the saved user state of current task.
- */
-int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
-{
- struct arch_uprobe_task *autask;
+#define CASE_COND \
+ COND(70, 71, XF(OF)) \
+ COND(72, 73, XF(CF)) \
+ COND(74, 75, XF(ZF)) \
+ COND(78, 79, XF(SF)) \
+ COND(7a, 7b, XF(PF)) \
+ COND(76, 77, XF(CF) || XF(ZF)) \
+ COND(7c, 7d, XF(SF) != XF(OF)) \
+ COND(7e, 7f, XF(ZF) || XF(SF) != XF(OF))
- autask = &current->utask->autask;
- autask->saved_trap_nr = current->thread.trap_nr;
- current->thread.trap_nr = UPROBE_TRAP_NR;
- regs->ip = current->utask->xol_vaddr;
- pre_xol_rip_insn(auprobe, regs, autask);
+#define COND(op_y, op_n, expr) \
+ case 0x ## op_y: DO((expr) != 0) \
+ case 0x ## op_n: DO((expr) == 0)
- autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
- regs->flags |= X86_EFLAGS_TF;
- if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
- set_task_blockstep(current, false);
+#define XF(xf) (!!(flags & X86_EFLAGS_ ## xf))
- return 0;
+static bool is_cond_jmp_opcode(u8 opcode)
+{
+ switch (opcode) {
+ #define DO(expr) \
+ return true;
+ CASE_COND
+ #undef DO
+
+ default:
+ return false;
+ }
}
-/*
- * This function is called by arch_uprobe_post_xol() to adjust the return
- * address pushed by a call instruction executed out of line.
- */
-static int adjust_ret_addr(unsigned long sp, long correction)
+static bool check_jmp_cond(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- int rasize, ncopied;
- long ra = 0;
+ unsigned long flags = regs->flags;
- if (is_ia32_task())
- rasize = 4;
- else
- rasize = 8;
+ switch (auprobe->branch.opc1) {
+ #define DO(expr) \
+ return expr;
+ CASE_COND
+ #undef DO
- ncopied = copy_from_user(&ra, (void __user *)sp, rasize);
- if (unlikely(ncopied))
- return -EFAULT;
+ default: /* not a conditional jmp */
+ return true;
+ }
+}
- ra += correction;
- ncopied = copy_to_user((void __user *)sp, &ra, rasize);
- if (unlikely(ncopied))
- return -EFAULT;
+#undef XF
+#undef COND
+#undef CASE_COND
- return 0;
+static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ unsigned long new_ip = regs->ip += auprobe->branch.ilen;
+ unsigned long offs = (long)auprobe->branch.offs;
+
+ if (branch_is_call(auprobe)) {
+ unsigned long new_sp = regs->sp - sizeof_long();
+ /*
+ * If it fails we execute this (mangled, see the comment in
+ * branch_clear_offset) insn out-of-line. In the likely case
+ * this should trigger the trap, and the probed application
+ * should die or restart the same insn after it handles the
+ * signal, arch_uprobe_post_xol() won't be even called.
+ *
+ * But there is corner case, see the comment in ->post_xol().
+ */
+ if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long()))
+ return false;
+ regs->sp = new_sp;
+ } else if (!check_jmp_cond(auprobe, regs)) {
+ offs = 0;
+ }
+
+ regs->ip = new_ip + offs;
+ return true;
}
-#ifdef CONFIG_X86_64
-static bool is_riprel_insn(struct arch_uprobe *auprobe)
+static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0);
+ BUG_ON(!branch_is_call(auprobe));
+ /*
+ * We can only get here if branch_emulate_op() failed to push the ret
+ * address _and_ another thread expanded our stack before the (mangled)
+ * "call" insn was executed out-of-line. Just restore ->sp and restart.
+ * We could also restore ->ip and try to call branch_emulate_op() again.
+ */
+ regs->sp += sizeof_long();
+ return -ERESTART;
}
-static void
-handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
{
- if (is_riprel_insn(auprobe)) {
- struct arch_uprobe_task *autask;
+ /*
+ * Turn this insn into "call 1f; 1:", this is what we will execute
+ * out-of-line if ->emulate() fails. We only need this to generate
+ * a trap, so that the probed task receives the correct signal with
+ * the properly filled siginfo.
+ *
+ * But see the comment in ->post_xol(), in the unlikely case it can
+ * succeed. So we need to ensure that the new ->ip can not fall into
+ * the non-canonical area and trigger #GP.
+ *
+ * We could turn it into (say) "pushf", but then we would need to
+ * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte
+ * of ->insn[] for set_orig_insn().
+ */
+ memset(auprobe->insn + insn_offset_immediate(insn),
+ 0, insn->immediate.nbytes);
+}
- autask = &current->utask->autask;
- if (auprobe->fixups & UPROBE_FIX_RIP_AX)
- regs->ax = autask->saved_scratch_register;
- else
- regs->cx = autask->saved_scratch_register;
+static struct uprobe_xol_ops branch_xol_ops = {
+ .emulate = branch_emulate_op,
+ .post_xol = branch_post_xol_op,
+};
+
+/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */
+static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
+{
+ u8 opc1 = OPCODE1(insn);
+
+ /* has the side-effect of processing the entire instruction */
+ insn_get_length(insn);
+ if (WARN_ON_ONCE(!insn_complete(insn)))
+ return -ENOEXEC;
+
+ switch (opc1) {
+ case 0xeb: /* jmp 8 */
+ case 0xe9: /* jmp 32 */
+ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
+ break;
+
+ case 0xe8: /* call relative */
+ branch_clear_offset(auprobe, insn);
+ break;
+ case 0x0f:
+ if (insn->opcode.nbytes != 2)
+ return -ENOSYS;
/*
- * The original instruction includes a displacement, and so
- * is 4 bytes longer than what we've just single-stepped.
- * Fall through to handle stuff like "jmpq *...(%rip)" and
- * "callq *...(%rip)".
+ * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches
+ * OPCODE1() of the "short" jmp which checks the same condition.
*/
- if (correction)
- *correction += 4;
+ opc1 = OPCODE2(insn) - 0x10;
+ default:
+ if (!is_cond_jmp_opcode(opc1))
+ return -ENOSYS;
}
+
+ auprobe->branch.opc1 = opc1;
+ auprobe->branch.ilen = insn->length;
+ auprobe->branch.offs = insn->immediate.value;
+
+ auprobe->ops = &branch_xol_ops;
+ return 0;
}
-#else
-static void
-handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
+
+/**
+ * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
+ * @mm: the probed address space.
+ * @arch_uprobe: the probepoint information.
+ * @addr: virtual address at which to install the probepoint
+ * Return 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
+{
+ struct insn insn;
+ bool fix_ip = true, fix_call = false;
+ int ret;
+
+ ret = validate_insn_bits(auprobe, mm, &insn);
+ if (ret)
+ return ret;
+
+ ret = branch_setup_xol_ops(auprobe, &insn);
+ if (ret != -ENOSYS)
+ return ret;
+
+ /*
+ * Figure out which fixups arch_uprobe_post_xol() will need to perform,
+ * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups
+ * is either zero or it reflects rip-related fixups.
+ */
+ switch (OPCODE1(&insn)) {
+ case 0x9d: /* popf */
+ auprobe->fixups |= UPROBE_FIX_SETF;
+ break;
+ case 0xc3: /* ret or lret -- ip is correct */
+ case 0xcb:
+ case 0xc2:
+ case 0xca:
+ fix_ip = false;
+ break;
+ case 0x9a: /* call absolute - Fix return addr, not ip */
+ fix_call = true;
+ fix_ip = false;
+ break;
+ case 0xea: /* jmp absolute -- ip is correct */
+ fix_ip = false;
+ break;
+ case 0xff:
+ insn_get_modrm(&insn);
+ switch (MODRM_REG(&insn)) {
+ case 2: case 3: /* call or lcall, indirect */
+ fix_call = true;
+ case 4: case 5: /* jmp or ljmp, indirect */
+ fix_ip = false;
+ }
+ /* fall through */
+ default:
+ handle_riprel_insn(auprobe, &insn);
+ }
+
+ if (fix_ip)
+ auprobe->fixups |= UPROBE_FIX_IP;
+ if (fix_call)
+ auprobe->fixups |= UPROBE_FIX_CALL;
+
+ auprobe->ops = &default_xol_ops;
+ return 0;
+}
+
+/*
+ * arch_uprobe_pre_xol - prepare to execute out of line.
+ * @auprobe: the probepoint information.
+ * @regs: reflects the saved user state of current task.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- /* No RIP-relative addressing on 32-bit */
+ struct uprobe_task *utask = current->utask;
+
+ regs->ip = utask->xol_vaddr;
+ utask->autask.saved_trap_nr = current->thread.trap_nr;
+ current->thread.trap_nr = UPROBE_TRAP_NR;
+
+ utask->autask.saved_tf = !!(regs->flags & X86_EFLAGS_TF);
+ regs->flags |= X86_EFLAGS_TF;
+ if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
+ set_task_blockstep(current, false);
+
+ if (auprobe->ops->pre_xol)
+ return auprobe->ops->pre_xol(auprobe, regs);
+ return 0;
}
-#endif
/*
* If xol insn itself traps and generates a signal(Say,
@@ -592,22 +752,25 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
*/
int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- struct uprobe_task *utask;
- long correction;
- int result = 0;
+ struct uprobe_task *utask = current->utask;
WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
- utask = current->utask;
- current->thread.trap_nr = utask->autask.saved_trap_nr;
- correction = (long)(utask->vaddr - utask->xol_vaddr);
- handle_riprel_post_xol(auprobe, regs, &correction);
- if (auprobe->fixups & UPROBE_FIX_IP)
- regs->ip += correction;
-
- if (auprobe->fixups & UPROBE_FIX_CALL)
- result = adjust_ret_addr(regs->sp, correction);
+ if (auprobe->ops->post_xol) {
+ int err = auprobe->ops->post_xol(auprobe, regs);
+ if (err) {
+ arch_uprobe_abort_xol(auprobe, regs);
+ /*
+ * Restart the probed insn. ->post_xol() must ensure
+ * this is really possible if it returns -ERESTART.
+ */
+ if (err == -ERESTART)
+ return 0;
+ return err;
+ }
+ }
+ current->thread.trap_nr = utask->autask.saved_trap_nr;
/*
* arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
* so we can get an extra SIGTRAP if we do not clear TF. We need
@@ -618,7 +781,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
else if (!(auprobe->fixups & UPROBE_FIX_SETF))
regs->flags &= ~X86_EFLAGS_TF;
- return result;
+ return 0;
}
/* callback routine for handling exceptions. */
@@ -652,8 +815,9 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
/*
* This function gets called when XOL instruction either gets trapped or
- * the thread has a fatal signal, so reset the instruction pointer to its
- * probed address.
+ * the thread has a fatal signal, or if arch_uprobe_post_xol() failed.
+ * Reset the instruction pointer to its probed address for the potential
+ * restart or for post mortem analysis.
*/
void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
@@ -668,25 +832,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
regs->flags &= ~X86_EFLAGS_TF;
}
-/*
- * Skip these instructions as per the currently known x86 ISA.
- * rep=0x66*; nop=0x90
- */
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- int i;
-
- for (i = 0; i < MAX_UINSN_BYTES; i++) {
- if (auprobe->insn[i] == 0x66)
- continue;
-
- if (auprobe->insn[i] == 0x90) {
- regs->ip += i + 1;
- return true;
- }
-
- break;
- }
+ if (auprobe->ops->emulate)
+ return auprobe->ops->emulate(auprobe, regs);
return false;
}
@@ -701,23 +850,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
unsigned long
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
{
- int rasize, ncopied;
+ int rasize = sizeof_long(), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
- rasize = is_ia32_task() ? 4 : 8;
- ncopied = copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize);
- if (unlikely(ncopied))
+ if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
return -1;
/* check whether address has been already hijacked */
if (orig_ret_vaddr == trampoline_vaddr)
return orig_ret_vaddr;
- ncopied = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
- if (likely(!ncopied))
+ nleft = copy_to_user((void __user *)regs->sp, &trampoline_vaddr, rasize);
+ if (likely(!nleft))
return orig_ret_vaddr;
- if (ncopied != rasize) {
+ if (nleft != rasize) {
pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
"%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 04709b66369..d1edc5e6fd0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
-/* Can skip singlestep */
-#define UPROBE_SKIP_SSTEP 1
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
- /* For now assume that the instruction need not be single-stepped */
- __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
/* add to uprobes_tree, sorted on inode:offset */
cur_uprobe = insert_uprobe(uprobe);
-
/* a uprobe exists for this inode:offset combination */
if (cur_uprobe) {
kfree(uprobe);
@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
return true;
}
-/*
- * Avoid singlestepping the original instruction if the original instruction
- * is a NOP or can be emulated.
- */
-static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
-{
- if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
- if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
- return true;
- clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
- }
- return false;
-}
-
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
handler_chain(uprobe, regs);
- if (can_skip_sstep(uprobe, regs))
+ if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
goto out;
if (!pre_ssout(uprobe, regs, bp_vaddr))
return;
- /* can_skip_sstep() succeeded, or restart if can't singlestep */
+ /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
out:
put_uprobe(uprobe);
}
@@ -1886,10 +1867,11 @@ out:
static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
{
struct uprobe *uprobe;
+ int err = 0;
uprobe = utask->active_uprobe;
if (utask->state == UTASK_SSTEP_ACK)
- arch_uprobe_post_xol(&uprobe->arch, regs);
+ err = arch_uprobe_post_xol(&uprobe->arch, regs);
else if (utask->state == UTASK_SSTEP_TRAPPED)
arch_uprobe_abort_xol(&uprobe->arch, regs);
else
@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* see uprobe_deny_signal() */
spin_unlock_irq(&current->sighand->siglock);
+
+ if (unlikely(err)) {
+ uprobe_warn(current, "execute the probed insn, sending SIGILL.");
+ force_sig_info(SIGILL, SEND_SIG_FORCED, current);
+ }
}
/*
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index fdfceee0ffd..fbfa1192923 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -33,17 +33,20 @@ OPTIONS
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
@@ -89,6 +92,14 @@ OPTIONS
--order::
Specify compute sorting column number.
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options.
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
@@ -157,6 +168,10 @@ with:
- period_percent being the % of the hist entry period value within
single data file
+ - with filtering by -C, -d and/or -S, period_percent might be changed
+ relative to how entries are filtered. Use --percentage=absolute to
+ prevent such fluctuation.
+
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8eab8a4bdeb..09af6629856 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -25,10 +25,6 @@ OPTIONS
--verbose::
Be more verbose. (show symbol address, etc)
--d::
---dsos=::
- Only consider symbols in these dsos. CSV that understands
- file://filename entries.
-n::
--show-nr-samples::
Show the number of samples for each symbol
@@ -42,11 +38,18 @@ OPTIONS
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
@@ -237,6 +240,15 @@ OPTIONS
Do not show entries which have an overhead under that percent.
(Default: 0).
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
--header::
Show header information in the perf.data file. This includes
various information like hostname, OS and perf version, cpu/mem
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 976b00c6cdb..64ed79c4363 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -123,13 +123,16 @@ Default is to monitor all CPUS.
Show a column with the sum of periods.
--dsos::
- Only consider symbols in these dsos.
+ Only consider symbols in these dsos. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--comms::
- Only consider symbols in these comms.
+ Only consider symbols in these comms. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--symbols::
- Only consider these symbols.
+ Only consider these symbols. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
@@ -165,6 +168,15 @@ Default is to monitor all CPUS.
Do not show entries which have an overhead under that percent.
(Default: 0).
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 204fffe2253..6ef80f22c1e 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -220,7 +220,8 @@ static int setup_compute(const struct option *opt, const char *str,
static double period_percent(struct hist_entry *he, u64 period)
{
- u64 total = he->hists->stats.total_period;
+ u64 total = hists__total_period(he->hists);
+
return (period * 100.0) / total;
}
@@ -259,11 +260,18 @@ static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
char *buf, size_t size)
{
+ u64 he_total = he->hists->stats.total_period;
+ u64 pair_total = pair->hists->stats.total_period;
+
+ if (symbol_conf.filter_relative) {
+ he_total = he->hists->stats.total_non_filtered_period;
+ pair_total = pair->hists->stats.total_non_filtered_period;
+ }
return scnprintf(buf, size,
"(%" PRIu64 " * 100 / %" PRIu64 ") - "
"(%" PRIu64 " * 100 / %" PRIu64 ")",
- pair->stat.period, pair->hists->stats.total_period,
- he->stat.period, he->hists->stats.total_period);
+ pair->stat.period, pair_total,
+ he->stat.period, he_total);
}
static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
@@ -327,15 +335,16 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
- if (al.filtered)
- return 0;
-
if (hists__add_entry(&evsel->hists, &al, sample->period,
sample->weight, sample->transaction)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
+ if (al.filtered == 0) {
+ evsel->hists.stats.total_non_filtered_period += sample->period;
+ evsel->hists.nr_non_filtered_entries++;
+ }
evsel->hists.stats.total_period += sample->period;
return 0;
}
@@ -565,7 +574,9 @@ static void hists__compute_resort(struct hists *hists)
next = rb_first(root);
hists->nr_entries = 0;
+ hists->nr_non_filtered_entries = 0;
hists->stats.total_period = 0;
+ hists->stats.total_non_filtered_period = 0;
hists__reset_col_len(hists);
while (next != NULL) {
@@ -732,13 +743,16 @@ static const struct option options[] = {
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
static double baseline_percent(struct hist_entry *he)
{
- struct hists *hists = he->hists;
- return 100.0 * he->stat.period / hists->stats.total_period;
+ u64 total = hists__total_period(he->hists);
+
+ return 100.0 * he->stat.period / total;
}
static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
@@ -1120,6 +1134,8 @@ static int data_init(int argc, const char **argv)
int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
{
+ perf_config(perf_default_config, NULL);
+
sort_order = diff__default_sort_order;
argc = parse_options(argc, argv, options, diff_usage, 0);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 929462aa494..bd91de07d2a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -756,11 +756,13 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};
- const char * const kmem_usage[] = {
- "perf kmem [<options>] {record|stat}",
+ const char *const kmem_subcommands[] = { "record", "stat", NULL };
+ const char *kmem_usage[] = {
+ NULL,
NULL
};
- argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
+ argc = parse_options_subcommand(argc, argv, kmem_options,
+ kmem_subcommands, kmem_usage, 0);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index c852c7a85d3..6148afc995c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -961,8 +961,10 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
"perf lock info [<options>]",
NULL
};
- const char * const lock_usage[] = {
- "perf lock [<options>] {record|report|script|info}",
+ const char *const lock_subcommands[] = { "record", "report", "script",
+ "info", NULL };
+ const char *lock_usage[] = {
+ NULL,
NULL
};
const char * const report_usage[] = {
@@ -976,8 +978,8 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < LOCKHASH_SIZE; i++)
INIT_LIST_HEAD(lockhash_table + i);
- argc = parse_options(argc, argv, lock_options, lock_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
+ lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(lock_usage, lock_options);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 2e3ade69a58..4a1a6c94a5e 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -21,11 +21,6 @@ struct perf_mem {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
-static const char * const mem_usage[] = {
- "perf mem [<options>] {record <command> |report}",
- NULL
-};
-
static int __cmd_record(int argc, const char **argv)
{
int rec_argc, i = 0, j;
@@ -220,9 +215,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
" between columns '.' is reserved."),
OPT_END()
};
+ const char *const mem_subcommands[] = { "record", "report", NULL };
+ const char *mem_usage[] = {
+ NULL,
+ NULL
+ };
+
- argc = parse_options(argc, argv, mem_options, mem_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
+ mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
usage_with_options(mem_usage, mem_options);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c8f21137dfd..af8cb7a2c9b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -123,6 +123,8 @@ static int report__add_mem_hist_entry(struct report *rep, struct addr_location *
evsel->hists.stats.total_period += cost;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ if (!he->filtered)
+ evsel->hists.stats.nr_non_filtered_samples++;
err = hist_entry__append_callchain(he, sample);
out:
return err;
@@ -176,6 +178,8 @@ static int report__add_branch_hist_entry(struct report *rep, struct addr_locatio
evsel->hists.stats.total_period += 1;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ if (!he->filtered)
+ evsel->hists.stats.nr_non_filtered_samples++;
} else
goto out;
}
@@ -209,6 +213,8 @@ static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel,
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
evsel->hists.stats.total_period += sample->period;
+ if (!he->filtered)
+ evsel->hists.stats.nr_non_filtered_samples++;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
out:
return err;
@@ -337,6 +343,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
char buf[512];
size_t size = sizeof(buf);
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
if (perf_evsel__is_group_event(evsel)) {
struct perf_evsel *pos;
@@ -344,8 +355,13 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
evname = buf;
for_each_group_member(pos, evsel) {
- nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
- nr_events += pos->hists.stats.total_period;
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos->hists.stats.nr_non_filtered_samples;
+ nr_events += pos->hists.stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos->hists.stats.total_period;
+ }
}
}
@@ -823,6 +839,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "how to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
struct perf_data_file file = {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 9ac0a495c95..d3fb0ed7240 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1713,8 +1713,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"perf sched replay [<options>]",
NULL
};
- const char * const sched_usage[] = {
- "perf sched [<options>] {record|latency|map|replay|script}",
+ const char *const sched_subcommands[] = { "record", "latency", "map",
+ "replay", "script", NULL };
+ const char *sched_usage[] = {
+ NULL,
NULL
};
struct trace_sched_handler lat_ops = {
@@ -1736,8 +1738,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
sched.curr_pid[i] = -1;
- argc = parse_options(argc, argv, sched_options, sched_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
+ sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(sched_usage, sched_options);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 65aaa5bbf7e..37d30460bad 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -253,6 +253,9 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
return NULL;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ if (!he->filtered)
+ evsel->hists.stats.nr_non_filtered_samples++;
+
return he;
}
@@ -694,8 +697,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 ||
- al.filtered)
+ if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
return;
if (!top->kptr_restrict_warned &&
@@ -1116,6 +1118,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
OPT_CALLBACK(0, "percent-limit", &top, "percent",
"Don't show entries under that percent", parse_percent_limit),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
const char * const top_usage[] = {
diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
index ae3a57694b6..33569847fdc 100644
--- a/tools/perf/perf-completion.sh
+++ b/tools/perf/perf-completion.sh
@@ -121,8 +121,8 @@ __perf_main ()
elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then
evts=$($cmd list --raw-dump)
__perfcomp_colon "$evts" "$cur"
- # List subcommands for 'perf kvm'
- elif [[ $prev == "kvm" ]]; then
+ # List subcommands for perf commands
+ elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then
subcmds=$($cmd $prev --list-cmds)
__perfcomp_colon "$subcmds" "$cur"
# List long option names
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 7ec871af3f6..4d416984c59 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -769,12 +769,15 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
- hb->hists->stats.total_period;
+ u64 total = hists__total_period(h->hists);
+ float percent = 0.0;
if (h->filtered)
continue;
+ if (total)
+ percent = h->stat.period * 100.0 / total;
+
if (percent < hb->min_pcnt)
continue;
@@ -792,8 +795,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
- hists->stats.total_period;
+ u64 total = hists__total_period(hists);
+ float percent = 0.0;
+
+ if (total)
+ percent = h->stat.period * 100.0 / total;
if (percent < min_pcnt)
return NULL;
@@ -813,8 +819,11 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
{
while (nd != NULL) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
- float percent = h->stat.period * 100.0 /
- hists->stats.total_period;
+ u64 total = hists__total_period(hists);
+ float percent = 0.0;
+
+ if (total)
+ percent = h->stat.period * 100.0 / total;
if (!h->filtered && percent >= min_pcnt)
return nd;
@@ -1189,6 +1198,11 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
char buf[512];
size_t buflen = sizeof(buf);
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
if (perf_evsel__is_group_event(evsel)) {
struct perf_evsel *pos;
@@ -1196,8 +1210,13 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
ev_name = buf;
for_each_group_member(pos, evsel) {
- nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
- nr_events += pos->hists.stats.total_period;
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos->hists.stats.nr_non_filtered_samples;
+ nr_events += pos->hists.stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos->hists.stats.total_period;
+ }
}
}
@@ -1370,6 +1389,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
"C Collapse all callchains\n" \
"d Zoom into current DSO\n" \
"E Expand all callchains\n" \
+ "F Toggle percentage of filtered entries\n" \
/* help messages are sorted by lexical order of the hotkey */
const char report_help[] = HIST_BROWSER_HELP_COMMON
@@ -1475,6 +1495,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (env->arch)
tui__header_window(env);
continue;
+ case 'F':
+ symbol_conf.filter_relative ^= 1;
+ continue;
case K_F1:
case 'h':
case '?':
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index e395ef9b0ae..91f10f3f6dd 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -228,12 +228,15 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
GtkTreeIter iter;
- float percent = h->stat.period * 100.0 /
- hists->stats.total_period;
+ u64 total = hists__total_period(h->hists);
+ float percent = 0.0;
if (h->filtered)
continue;
+ if (total)
+ percent = h->stat.period * 100.0 / total;
+
if (percent < min_pcnt)
continue;
@@ -261,12 +264,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
}
if (symbol_conf.use_callchain && sort__has_sym) {
- u64 total;
-
if (callchain_param.mode == CHAIN_GRAPH_REL)
total = h->stat.period;
- else
- total = hists->stats.total_period;
perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
sym_col, total);
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 0f403b83e9d..0912805c08f 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -32,10 +32,10 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
if (fmt_percent) {
double percent = 0.0;
+ u64 total = hists__total_period(hists);
- if (hists->stats.total_period)
- percent = 100.0 * get_field(he) /
- hists->stats.total_period;
+ if (total)
+ percent = 100.0 * get_field(he) / total;
ret += hpp__call_print_fn(hpp, print_fn, fmt, percent);
} else
@@ -50,7 +50,7 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
list_for_each_entry(pair, &he->pairs.head, pairs.node) {
u64 period = get_field(pair);
- u64 total = pair->hists->stats.total_period;
+ u64 total = hists__total_period(pair->hists);
if (!total)
continue;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 3e0fdd369cc..24519e14ac5 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -11,6 +11,7 @@
#include "util.h"
#include "cache.h"
#include "exec_cmd.h"
+#include "util/hist.h" /* perf_hist_config */
#define MAXNAME (256)
@@ -355,6 +356,9 @@ int perf_default_config(const char *var, const char *value,
if (!prefixcmp(var, "core."))
return perf_default_core_config(var, value);
+ if (!prefixcmp(var, "hist."))
+ return perf_hist_config(var, value);
+
/* Add other config variables here. */
return 0;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f38590d7561..5a892477aa5 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -321,9 +321,11 @@ void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
{
if (!h->filtered) {
hists__calc_col_len(hists, h);
- ++hists->nr_entries;
- hists->stats.total_period += h->stat.period;
+ hists->nr_non_filtered_entries++;
+ hists->stats.total_non_filtered_period += h->stat.period;
}
+ hists->nr_entries++;
+ hists->stats.total_period += h->stat.period;
}
static u8 symbol__parent_filter(const struct symbol *parent)
@@ -674,8 +676,9 @@ void hists__output_resort(struct hists *hists)
next = rb_first(root);
hists->entries = RB_ROOT;
- hists->nr_entries = 0;
+ hists->nr_non_filtered_entries = 0;
hists->stats.total_period = 0;
+ hists->stats.total_non_filtered_period = 0;
hists__reset_col_len(hists);
while (next) {
@@ -694,12 +697,12 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h
if (h->filtered)
return;
- ++hists->nr_entries;
+ ++hists->nr_non_filtered_entries;
if (h->ms.unfolded)
- hists->nr_entries += h->nr_rows;
+ hists->nr_non_filtered_entries += h->nr_rows;
h->row_offset = 0;
- hists->stats.total_period += h->stat.period;
- hists->stats.nr_events[PERF_RECORD_SAMPLE] += h->stat.nr_events;
+ hists->stats.total_non_filtered_period += h->stat.period;
+ hists->stats.nr_non_filtered_samples += h->stat.nr_events;
hists__calc_col_len(hists, h);
}
@@ -721,8 +724,9 @@ void hists__filter_by_dso(struct hists *hists)
{
struct rb_node *nd;
- hists->nr_entries = hists->stats.total_period = 0;
- hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
+ hists->nr_non_filtered_entries = 0;
+ hists->stats.total_non_filtered_period = 0;
+ hists->stats.nr_non_filtered_samples = 0;
hists__reset_col_len(hists);
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
@@ -754,8 +758,9 @@ void hists__filter_by_thread(struct hists *hists)
{
struct rb_node *nd;
- hists->nr_entries = hists->stats.total_period = 0;
- hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
+ hists->nr_non_filtered_entries = 0;
+ hists->stats.total_non_filtered_period = 0;
+ hists->stats.nr_non_filtered_samples = 0;
hists__reset_col_len(hists);
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
@@ -785,8 +790,9 @@ void hists__filter_by_symbol(struct hists *hists)
{
struct rb_node *nd;
- hists->nr_entries = hists->stats.total_period = 0;
- hists->stats.nr_events[PERF_RECORD_SAMPLE] = 0;
+ hists->nr_non_filtered_entries = 0;
+ hists->stats.total_non_filtered_period = 0;
+ hists->stats.nr_non_filtered_samples = 0;
hists__reset_col_len(hists);
for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
@@ -931,3 +937,30 @@ int hists__link(struct hists *leader, struct hists *other)
return 0;
}
+
+u64 hists__total_period(struct hists *hists)
+{
+ return symbol_conf.filter_relative ? hists->stats.total_non_filtered_period :
+ hists->stats.total_period;
+}
+
+int parse_filter_percentage(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!strcmp(arg, "relative"))
+ symbol_conf.filter_relative = true;
+ else if (!strcmp(arg, "absolute"))
+ symbol_conf.filter_relative = false;
+ else
+ return -1;
+
+ return 0;
+}
+
+int perf_hist_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "hist.percentage"))
+ return parse_filter_percentage(NULL, value, 0);
+
+ return 0;
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1f1f513dfe7..5a0343eb22e 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -37,9 +37,11 @@ enum hist_filter {
*/
struct events_stats {
u64 total_period;
+ u64 total_non_filtered_period;
u64 total_lost;
u64 total_invalid_chains;
u32 nr_events[PERF_RECORD_HEADER_MAX];
+ u32 nr_non_filtered_samples;
u32 nr_lost_warned;
u32 nr_unknown_events;
u32 nr_invalid_chains;
@@ -83,6 +85,7 @@ struct hists {
struct rb_root entries;
struct rb_root entries_collapsed;
u64 nr_entries;
+ u64 nr_non_filtered_entries;
const struct thread *thread_filter;
const struct dso *dso_filter;
const char *uid_filter_str;
@@ -112,6 +115,7 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
void hists__output_recalc_col_len(struct hists *hists, int max_rows);
+u64 hists__total_period(struct hists *hists);
void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h);
void hists__inc_nr_events(struct hists *hists, u32 type);
void events_stats__inc(struct events_stats *stats, u32 type);
@@ -250,4 +254,10 @@ static inline int script_browse(const char *script_opt __maybe_unused)
#endif
unsigned int hists__sort_list_width(struct hists *hists);
+
+struct option;
+int parse_filter_percentage(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused);
+int perf_hist_config(const char *var, const char *value);
+
#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 501e4e722e8..ae94e006a52 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -115,7 +115,8 @@ struct symbol_conf {
annotate_asm_raw,
annotate_src,
event_group,
- demangle;
+ demangle,
+ filter_relative;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,