diff options
author | Jan Beulich <JBeulich@novell.com> | 2009-12-18 16:16:03 +0000 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-30 11:57:32 +0100 |
commit | 7269e8812a59f74fb1ce134465d0bcf5683b93a1 (patch) | |
tree | cfa0022a98907232edc8ffbe145926a272da14f1 /arch/x86/lib/memset_64.S | |
parent | 1b1d9258181bae199dc940f4bd0298126b9a73d9 (diff) |
x86-64: Modify memcpy()/memset() alternatives mechanism
In order to avoid unnecessary chains of branches, rather than
implementing memcpy()/memset()'s access to their alternative
implementations via a jump, patch the (larger) original function
directly.
The memcpy() part of this is slightly subtle: while alternative
instruction patching does itself use memcpy(), with the
replacement block being less than 64-bytes in size the main loop
of the original function doesn't get used for copying memcpy_c()
over memcpy(), and hence we can safely write over its beginning.
Also note that the CFI annotations are fine for both variants of
each of the functions.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib/memset_64.S')
-rw-r--r-- | arch/x86/lib/memset_64.S | 18 |
1 files changed, 6 insertions, 12 deletions
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd..e88d3b81644 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -12,9 +12,8 @@ * * rax original destination */ - ALIGN -memset_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d @@ -29,8 +28,8 @@ memset_c: rep stosb movq %r9,%rax ret - CFI_ENDPROC -ENDPROC(memset_c) +.Lmemset_e: + .previous ENTRY(memset) ENTRY(__memset) @@ -118,16 +117,11 @@ ENDPROC(__memset) #include <asm/cpufeature.h> - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (memset_c - memset) - (2f - 1b) /* offset */ -2: - .previous .section .altinstructions,"a" .align 8 .quad memset - .quad 1b + .quad .Lmemset_c .byte X86_FEATURE_REP_GOOD .byte .Lfinal - memset - .byte 2b - 1b + .byte .Lmemset_e - .Lmemset_c .previous |