From 1b1d9258181bae199dc940f4bd0298126b9a73d9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Dec 2009 16:12:56 +0000 Subject: x86-64: Modify copy_user_generic() alternatives mechanism In order to avoid unnecessary chains of branches, rather than implementing copy_user_generic() as a function consisting of just a single (possibly patched) branch, instead properly deal with patching call instructions in the alternative instructions framework, and move the patching into the callers. As a follow-on, one could also introduce something like __EXPORT_SYMBOL_ALT() to avoid patching call sites in modules. Signed-off-by: Jan Beulich Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4B2BB8180200007800026AE7@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index cf889d4e076..71100c98e33 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -90,12 +90,6 @@ ENTRY(_copy_from_user) CFI_ENDPROC ENDPROC(_copy_from_user) -ENTRY(copy_user_generic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(copy_user_generic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) -- cgit v1.2.3-70-g09d2 From 7269e8812a59f74fb1ce134465d0bcf5683b93a1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Dec 2009 16:16:03 +0000 Subject: x86-64: Modify memcpy()/memset() alternatives mechanism In order to avoid unnecessary chains of branches, rather than implementing memcpy()/memset()'s access to their alternative implementations via a jump, patch the (larger) original function directly. The memcpy() part of this is slightly subtle: while alternative instruction patching does itself use memcpy(), with the replacement block being less than 64-bytes in size the main loop of the original function doesn't get used for copying memcpy_c() over memcpy(), and hence we can safely write over its beginning. Also note that the CFI annotations are fine for both variants of each of the functions. Signed-off-by: Jan Beulich Cc: Nick Piggin Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/memcpy_64.S | 23 ++++++++--------------- arch/x86/lib/memset_64.S | 18 ++++++------------ 2 files changed, 14 insertions(+), 27 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441ed1b5..f82e884928a 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -20,12 +20,11 @@ /* * memcpy_c() - fast string ops (REP MOVSQ) based variant. * - * Calls to this get patched into the kernel image via the + * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ - ALIGN -memcpy_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c: movq %rdi, %rax movl %edx, %ecx @@ -35,8 +34,8 @@ memcpy_c: movl %edx, %ecx rep movsb ret - CFI_ENDPROC -ENDPROC(memcpy_c) +.Lmemcpy_e: + .previous ENTRY(__memcpy) ENTRY(memcpy) @@ -128,16 +127,10 @@ ENDPROC(__memcpy) * It is also a lot simpler. Use this when possible: */ - .section .altinstr_replacement, "ax" -1: .byte 0xeb /* jmp */ - .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ -2: - .previous - .section .altinstructions, "a" .align 8 .quad memcpy - .quad 1b + .quad .Lmemcpy_c .byte X86_FEATURE_REP_GOOD /* @@ -145,6 +138,6 @@ ENDPROC(__memcpy) * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte 2b - 1b - .byte 2b - 1b + .byte .Lmemcpy_e - .Lmemcpy_c + .byte .Lmemcpy_e - .Lmemcpy_c .previous diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd..e88d3b81644 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -12,9 +12,8 @@ * * rax original destination */ - ALIGN -memset_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d @@ -29,8 +28,8 @@ memset_c: rep stosb movq %r9,%rax ret - CFI_ENDPROC -ENDPROC(memset_c) +.Lmemset_e: + .previous ENTRY(memset) ENTRY(__memset) @@ -118,16 +117,11 @@ ENDPROC(__memset) #include - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp */ - .byte (memset_c - memset) - (2f - 1b) /* offset */ -2: - .previous .section .altinstructions,"a" .align 8 .quad memset - .quad 1b + .quad .Lmemset_c .byte X86_FEATURE_REP_GOOD .byte .Lfinal - memset - .byte 2b - 1b + .byte .Lmemset_e - .Lmemset_c .previous -- cgit v1.2.3-70-g09d2 From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 12 Jan 2010 18:16:42 -0800 Subject: x86-64: support native xadd rwsem implementation This one is much faster than the spinlock based fallback rwsem code, with certain artifical benchmarks having shown 300%+ improvement on threaded page faults etc. Again, note the 32767-thread limit here. So this really does need that whole "make rwsem_count_t be 64-bit and fix the BIAS values to match" extension on top of it, but that is conceptually a totally independent issue. NOT TESTED! The original patch that this all was based on were tested by KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the cleaned-up series, so caveat emptor.. Also note that it _may_ be a good idea to mark some more registers clobbered on x86-64 in the inline asms instead of saving/restoring them. They are inline functions, but they are only used in places where there are not a lot of live registers _anyway_, so doing for example the clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any worse, and would make the slow-path code smaller. (Not that the slow-path really matters to that degree. Saving a few unnecessary registers is the _least_ of our problems when we hit the slow path. The instruction/cycle counting really only matters in the fast path). Signed-off-by: Linus Torvalds LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig.cpu | 2 +- arch/x86/lib/Makefile | 1 + arch/x86/lib/rwsem_64.S | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 arch/x86/lib/rwsem_64.S (limited to 'arch/x86/lib') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 08e442bc3ab..9d38a13b4ce 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_32 && !M386 + depends on X86_64 || !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f303..c80245131fd 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -39,4 +39,5 @@ else lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o endif diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S new file mode 100644 index 00000000000..15acecf0d7a --- /dev/null +++ b/arch/x86/lib/rwsem_64.S @@ -0,0 +1,81 @@ +/* + * x86-64 rwsem wrappers + * + * This interfaces the inline asm code to the slow-path + * C routines. We need to save the call-clobbered regs + * that the asm does not mark as clobbered, and move the + * argument from %rax to %rdi. + * + * NOTE! We don't need to save %rax, because the functions + * will always return the semaphore pointer in %rax (which + * is also the input argument to these helpers) + * + * The following can clobber %rdx because the asm clobbers it: + * call_rwsem_down_write_failed + * call_rwsem_wake + * but %rdi, %rsi, %rcx, %r8-r11 always need saving. + */ + +#include +#include +#include +#include +#include + +#define save_common_regs \ + pushq %rdi; \ + pushq %rsi; \ + pushq %rcx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11 + +#define restore_common_regs \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rcx; \ + popq %rsi; \ + popq %rdi + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_down_read_failed + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + save_common_regs + movq %rax,%rdi + call rwsem_down_write_failed + restore_common_regs + ret + ENDPROC(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + save_common_regs + movq %rax,%rdi + call rwsem_wake + restore_common_regs +1: ret + ENDPROC(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_downgrade_wake + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_downgrade_wake) -- cgit v1.2.3-70-g09d2 From a7b480e7f30b3813353ec009f10f2ac7a6669f3b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jan 2010 16:01:03 +0100 Subject: x86, lib: Add wbinvd smp helpers Add wbinvd_on_cpu and wbinvd_on_all_cpus stubs for executing wbinvd on a particular CPU. [ hpa: renamed lib/smp.c to lib/cache-smp.c ] [ hpa: wbinvd_on_all_cpus() returns int, but wbinvd() returns void. Thus, the former cannot be a macro for the latter, replace with an inline function. ] Signed-off-by: Borislav Petkov LKML-Reference: <1264172467-25155-2-git-send-email-bp@amd64.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/smp.h | 9 +++++++++ arch/x86/lib/Makefile | 2 +- arch/x86/lib/cache-smp.c | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 arch/x86/lib/cache-smp.c (limited to 'arch/x86/lib') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1e796782cd7..4cfc9082406 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -135,6 +135,8 @@ int native_cpu_disable(void); void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); +void wbinvd_on_cpu(int cpu); +int wbinvd_on_all_cpus(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); @@ -147,6 +149,13 @@ static inline int num_booting_cpus(void) { return cpumask_weight(cpu_callout_mask); } +#else /* !CONFIG_SMP */ +#define wbinvd_on_cpu(cpu) wbinvd() +static inline int wbinvd_on_all_cpus(void) +{ + wbinvd(); + return 0; +} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f303..d85e0e438b5 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) += msr-smp.o +obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c new file mode 100644 index 00000000000..a3c66887503 --- /dev/null +++ b/arch/x86/lib/cache-smp.c @@ -0,0 +1,19 @@ +#include +#include + +static void __wbinvd(void *dummy) +{ + wbinvd(); +} + +void wbinvd_on_cpu(int cpu) +{ + smp_call_function_single(cpu, __wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_cpu); + +int wbinvd_on_all_cpus(void) +{ + return on_each_cpu(__wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_all_cpus); -- cgit v1.2.3-70-g09d2 From 6175ddf06b6172046a329e3abfd9c901a43efd2e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 5 Feb 2010 09:37:07 -0500 Subject: x86: Clean up mem*io functions. Iomem has no special significance on x86. Use the standard mem* functions instead of trying to call other versions. Some fixups are needed to match the function prototypes. Signed-off-by: Brian Gerst LKML-Reference: <1265380629-3212-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/misc.c | 13 ++++--------- arch/x86/include/asm/io_32.h | 10 +++++----- arch/x86/include/asm/io_64.h | 22 +++++++++++++--------- arch/x86/lib/Makefile | 2 +- arch/x86/lib/io_64.c | 25 ------------------------- 5 files changed, 23 insertions(+), 49 deletions(-) delete mode 100644 arch/x86/lib/io_64.c (limited to 'arch/x86/lib') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 3b22fe8ab91..88042e812d3 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -19,11 +19,6 @@ #define _ASM_X86_DESC_H 1 #endif -#ifdef CONFIG_X86_64 -#define _LINUX_STRING_H_ 1 -#define __LINUX_BITMAP_H 1 -#endif - #include #include #include @@ -131,8 +126,8 @@ static void error(char *m); static struct boot_params *real_mode; /* Pointer to real-mode data */ static int quiet; -static void *memset(void *s, int c, unsigned n); -void *memcpy(void *dest, const void *src, unsigned n); +void *memset(void *s, int c, size_t n); +void *memcpy(void *dest, const void *src, size_t n); static void __putstr(int, const char *); #define putstr(__x) __putstr(0, __x) @@ -223,7 +218,7 @@ static void __putstr(int error, const char *s) outb(0xff & (pos >> 1), vidport+1); } -static void *memset(void *s, int c, unsigned n) +void *memset(void *s, int c, size_t n) { int i; char *ss = s; @@ -233,7 +228,7 @@ static void *memset(void *s, int c, unsigned n) return s; } -void *memcpy(void *dest, const void *src, unsigned n) +void *memcpy(void *dest, const void *src, size_t n) { int i; const char *s = src; diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index 72a6a4a930a..685e3329346 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -49,21 +49,21 @@ #define xlate_dev_kmem_ptr(p) p static inline void -memset_io(volatile void __iomem *addr, unsigned char val, int count) +memset_io(volatile void __iomem *addr, unsigned char val, size_t count) { memset((void __force *)addr, val, count); } static inline void -memcpy_fromio(void *dst, const volatile void __iomem *src, int count) +memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) { - __memcpy(dst, (const void __force *)src, count); + memcpy(dst, (const void __force *)src, count); } static inline void -memcpy_toio(volatile void __iomem *dst, const void *src, int count) +memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) { - __memcpy((void __force *)dst, src, count); + memcpy((void __force *)dst, src, count); } /* diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index 4a94aef5acf..1305525813f 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_IO_64_H #define _ASM_X86_IO_64_H +#include +#include /* * This file contains the definitions for the x86 IO instructions @@ -46,20 +48,22 @@ */ #define xlate_dev_kmem_ptr(p) p -void memset_io(volatile void __iomem *a, int b, size_t c); +static inline void +memset_io(volatile void __iomem *addr, unsigned char val, size_t count) +{ + memset((void __force *)addr, val, count); +} -void __memcpy_fromio(void *, unsigned long, unsigned); -static inline void memcpy_fromio(void *to, const volatile void __iomem *from, - unsigned len) +static inline void +memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) { - __memcpy_fromio(to, (unsigned long)from, len); + memcpy(dst, (const void __force *)src, count); } -void __memcpy_toio(unsigned long, const void *, unsigned); -static inline void memcpy_toio(volatile void __iomem *to, const void *from, - unsigned len) +static inline void +memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) { - __memcpy_toio((unsigned long)to, from, len); + memcpy((void __force *)dst, src, count); } /* diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f303..fff14272dba 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -34,7 +34,7 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y) endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else - obj-y += io_64.o iomap_copy_64.o + obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c deleted file mode 100644 index 3f1eb59b5f0..00000000000 --- a/arch/x86/lib/io_64.c +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#include - -void __memcpy_toio(unsigned long dst, const void *src, unsigned len) -{ - __inline_memcpy((void *)dst, src, len); -} -EXPORT_SYMBOL(__memcpy_toio); - -void __memcpy_fromio(void *dst, unsigned long src, unsigned len) -{ - __inline_memcpy(dst, (const void *)src, len); -} -EXPORT_SYMBOL(__memcpy_fromio); - -void memset_io(volatile void __iomem *a, int b, size_t c) -{ - /* - * TODO: memset can mangle the IO patterns quite a bit. - * perhaps it would be better to use a dumb one: - */ - memset((void *)a, b, c); -} -EXPORT_SYMBOL(memset_io); -- cgit v1.2.3-70-g09d2