From e00b12e64be9a34ef071de7b6052ca9ea29dd460 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Oct 2013 12:52:06 +0200 Subject: perf/x86: Further optimize copy_from_user_nmi() Now that we can deal with nested NMI due to IRET re-enabling NMIs and can deal with faults from NMI by making sure we preserve CR2 over NMIs we can in fact simply access user-space memory from NMI context. So rewrite copy_from_user_nmi() to use __copy_from_user_inatomic() and rework the fault path to do the minimal required work before taking the in_atomic() fault handler. In particular avoid perf_sw_event() which would make perf recurse on itself (it should be harmless as our recursion protections should be able to deal with this -- but why tempt fate). Also rename notify_page_fault() to kprobes_fault() as that is a much better name; there is no notifier in it and its specific to kprobes. Don measured that his worst case NMI path shrunk from ~300K cycles to ~150K cycles. Cc: Stephane Eranian Cc: jmario@redhat.com Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Andi Kleen Cc: dave.hansen@linux.intel.com Tested-by: Don Zickus Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131024105206.GM2490@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'arch/x86/lib/usercopy.c') diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 4f74d94c8d9..5465b861394 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -11,39 +11,26 @@ #include /* - * best effort, GUP based copy_from_user() that is NMI-safe + * We rely on the nested NMI work to allow atomic faults from the NMI path; the + * nested NMI paths are careful to preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) { - unsigned long offset, addr = (unsigned long)from; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; + unsigned long ret; if (__range_not_ok(from, n, TASK_SIZE)) - return len; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page); - memcpy(to, map+offset, size); - kunmap_atomic(map); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; + return 0; + + /* + * Even though this function is typically called from NMI/IRQ context + * disable pagefaults so that its behaviour is consistent even when + * called form other contexts. + */ + pagefault_disable(); + ret = __copy_from_user_inatomic(to, from, n); + pagefault_enable(); + + return n - ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); -- cgit v1.2.3-70-g09d2 From 0a196848ca365ec582c6d86659be456be6d4ed96 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Oct 2013 21:16:22 +0100 Subject: perf: Fix arch_perf_out_copy_user default The arch_perf_output_copy_user() default of __copy_from_user_inatomic() returns bytes not copied, while all other argument functions given DEFINE_OUTPUT_COPY() return bytes copied. Since copy_from_user_nmi() is the odd duck out by returning bytes copied where all other *copy_{to,from}* functions return bytes not copied, change it over and ammend DEFINE_OUTPUT_COPY() to expect bytes not copied. Oddly enough DEFINE_OUTPUT_COPY() already returned bytes not copied while expecting its worker functions to return bytes copied. Signed-off-by: Peter Zijlstra Acked-by: will.deacon@arm.com Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20131030201622.GR16117@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 +- arch/x86/lib/usercopy.c | 2 +- arch/x86/oprofile/backtrace.c | 4 ++-- kernel/events/internal.h | 35 ++++++++++++++++++++++-------- 6 files changed, 33 insertions(+), 16 deletions(-) (limited to 'arch/x86/lib/usercopy.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8a87a322412..8e132931614 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1989,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) @@ -2041,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c1760ff3c75..ae96cfa5edd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -789,7 +789,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) size = ip - to; /* Must fit our buffer, see above */ bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) + if (bytes != 0) return 0; kaddr = buf; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 90ee6c1d054..d82d155aca8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -491,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) /* may fail if text not present */ bytes = copy_from_user_nmi(buf, (void __user *)from, size); - if (bytes != size) + if (bytes != 0) return X86_BR_NONE; addr = buf; diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 5465b861394..ddf9ecb53cc 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -31,6 +31,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) ret = __copy_from_user_inatomic(to, from, n); pagefault_enable(); - return n - ret; + return ret; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index d6aa6e8315d..5d04be5efb6 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head) unsigned long bytes; bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); - if (bytes != sizeof(bufhead)) + if (bytes != 0) return NULL; fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); @@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head) unsigned long bytes; bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); - if (bytes != sizeof(bufhead)) + if (bytes != 0) return NULL; oprofile_add_trace(bufhead[0].return_address); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ca6599723be..569b218782a 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) } #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ -static inline unsigned int \ +static inline unsigned long \ func_name(struct perf_output_handle *handle, \ - const void *buf, unsigned int len) \ + const void *buf, unsigned long len) \ { \ unsigned long size, written; \ \ do { \ - size = min_t(unsigned long, handle->size, len); \ - \ + size = min(handle->size, len); \ written = memcpy_func(handle->addr, buf, size); \ + written = size - written; \ \ len -= written; \ handle->addr += written; \ @@ -110,20 +110,37 @@ func_name(struct perf_output_handle *handle, \ return len; \ } -static inline int memcpy_common(void *dst, const void *src, size_t n) +static inline unsigned long +memcpy_common(void *dst, const void *src, unsigned long n) { memcpy(dst, src, n); - return n; + return 0; } DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) -#define MEMCPY_SKIP(dst, src, n) (n) +static inline unsigned long +memcpy_skip(void *dst, const void *src, unsigned long n) +{ + return 0; +} -DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) +DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip) #ifndef arch_perf_out_copy_user -#define arch_perf_out_copy_user __copy_from_user_inatomic +#define arch_perf_out_copy_user arch_perf_out_copy_user + +static inline unsigned long +arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) +{ + unsigned long ret; + + pagefault_disable(); + ret = __copy_from_user_inatomic(dst, src, n); + pagefault_enable(); + + return ret; +} #endif DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) -- cgit v1.2.3-70-g09d2