summaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/memcpy_64.S
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-13 09:50:21 +0200
committerIngo Molnar <mingo@kernel.org>2012-04-13 09:50:21 +0200
commit659c36fcda403013a01b85da07cf2d9711e6d6c7 (patch)
treeece2e7d0e2c19ea5a3d0ec172ad0b81a8a19021d /arch/x86/lib/memcpy_64.S
parent9521d830b6341d1887dcfc2aebde23fbfa5f1473 (diff)
parent5a7ed29c7572d00a75e8c4529e30c5ac2ef82271 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Fixes and improvements for perf/core: . Overhaul the tools/ makefiles, gluing them to the top level Makefile, from Borislav Petkov. . Move the UI files from tools/perf/util/ui/ to tools/perf/ui/. Also move the GTK+ browser to tools/perf/ui/gtk/, from Namhyung Kim. . Only fallback to sw cycles counter on ENOENT for the hw cycles, from Robert Richter . Trivial fixes from Robert Richter . Handle the autogenerated bison/flex files better, from Namhyung and Jiri Olsa. . Navigate jump instructions in the annotate browser, just press enter or ->, still needs support for a jump navigation history, i.e. to go back. . Search string in the annotate browser: same keys as vim: / forward n next backward/forward ? backward . Clarify number of events/samples in the report header, from Ashay Rane Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib/memcpy_64.S')
-rw-r--r--arch/x86/lib/memcpy_64.S44
1 files changed, 20 insertions, 24 deletions
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0ecde..1c273be7c97 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
-
- movl %edx, %ecx
- shrl $3, %ecx
+ movq %rdx, %rcx
+ shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
@@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
-
- movl %edx, %ecx
+ movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax
- /*
- * Use 32bit CMP here to avoid long NOP padding.
- */
- cmp $0x20, %edx
+ cmpq $0x20, %rdx
jb .Lhandle_tail
/*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
*/
cmp %dil, %sil
jl .Lcopy_backward
- subl $0x20, %edx
+ subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
@@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
- addq $0x20, %rdx
+ addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
/*
* Calculate copy position to head.
*/
- addq $0x20, %rdx
+ addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
- cmpq $16, %rdx
+ cmpl $16, %edx
jb .Lless_16bytes
/*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_16bytes:
- cmpq $8, %rdx
+ cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_8bytes:
- cmpq $4, %rdx
+ cmpl $4, %edx
jb .Lless_3bytes
/*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_3bytes:
- cmpl $0, %edx
- je .Lend
+ subl $1, %edx
+ jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
-.Lloop_1:
- movb (%rsi), %r8b
- movb %r8b, (%rdi)
- incq %rdi
- incq %rsi
- decl %edx
- jnz .Lloop_1
+ movzbl (%rsi), %ecx
+ jz .Lstore_1byte
+ movzbq 1(%rsi), %r8
+ movzbq (%rsi, %rdx), %r9
+ movb %r8b, 1(%rdi)
+ movb %r9b, (%rdi, %rdx)
+.Lstore_1byte:
+ movb %cl, (%rdi)
.Lend:
retq