From 9599ec0471deae24044241e2173090d2cbc0e899 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 17 Jan 2011 17:39:15 -0800 Subject: x86-64, mem: Convert memmove() to assembly file and fix return value bug memmove_64.c only implements memmove() function which is completely written in inline assembly code. Therefore it doesn't make sense to keep the assembly code in .c file. Currently memmove() doesn't store return value to rax. This may cause issue if caller uses the return value. The patch fixes this issue. Signed-off-by: Fenghua Yu LKML-Reference: <1295314755-6625-1-git-send-email-fenghua.yu@intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/lib/memmove_64.S | 197 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/memmove_64.c | 192 -------------------------------------------- 2 files changed, 197 insertions(+), 192 deletions(-) create mode 100644 arch/x86/lib/memmove_64.S delete mode 100644 arch/x86/lib/memmove_64.c (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S new file mode 100644 index 00000000000..0ecb8433e5a --- /dev/null +++ b/arch/x86/lib/memmove_64.S @@ -0,0 +1,197 @@ +/* + * Normally compiler builtins are used, but sometimes the compiler calls out + * of line code. Based on asm-i386/string.h. + * + * This assembly file is re-written from memmove_64.c file. + * - Copyright 2011 Fenghua Yu + */ +#define _STRING_C +#include +#include + +#undef memmove + +/* + * Implement memmove(). This can handle overlap between src and dst. + * + * Input: + * rdi: dest + * rsi: src + * rdx: count + * + * Output: + * rax: dest + */ +ENTRY(memmove) + CFI_STARTPROC + /* Handle more 32bytes in loop */ + mov %rdi, %rax + cmp $0x20, %rdx + jb 1f + + /* Decide forward/backward copy mode */ + cmp %rdi, %rsi + jb 2f + + /* + * movsq instruction have many startup latency + * so we handle small size by general register. + */ + cmp $680, %rdx + jb 3f + /* + * movsq instruction is only good for aligned case. + */ + + cmpb %dil, %sil + je 4f +3: + sub $0x20, %rdx + /* + * We gobble 32byts forward in each loop. + */ +5: + sub $0x20, %rdx + movq 0*8(%rsi), %r11 + movq 1*8(%rsi), %r10 + movq 2*8(%rsi), %r9 + movq 3*8(%rsi), %r8 + leaq 4*8(%rsi), %rsi + + movq %r11, 0*8(%rdi) + movq %r10, 1*8(%rdi) + movq %r9, 2*8(%rdi) + movq %r8, 3*8(%rdi) + leaq 4*8(%rdi), %rdi + jae 5b + addq $0x20, %rdx + jmp 1f + /* + * Handle data forward by movsq. + */ + .p2align 4 +4: + movq %rdx, %rcx + movq -8(%rsi, %rdx), %r11 + lea -8(%rdi, %rdx), %r10 + shrq $3, %rcx + rep movsq + movq %r11, (%r10) + jmp 13f + /* + * Handle data backward by movsq. + */ + .p2align 4 +7: + movq %rdx, %rcx + movq (%rsi), %r11 + movq %rdi, %r10 + leaq -8(%rsi, %rdx), %rsi + leaq -8(%rdi, %rdx), %rdi + shrq $3, %rcx + std + rep movsq + cld + movq %r11, (%r10) + jmp 13f + + /* + * Start to prepare for backward copy. + */ + .p2align 4 +2: + cmp $680, %rdx + jb 6f + cmp %dil, %sil + je 7b +6: + /* + * Calculate copy position to tail. + */ + addq %rdx, %rsi + addq %rdx, %rdi + subq $0x20, %rdx + /* + * We gobble 32byts backward in each loop. + */ +8: + subq $0x20, %rdx + movq -1*8(%rsi), %r11 + movq -2*8(%rsi), %r10 + movq -3*8(%rsi), %r9 + movq -4*8(%rsi), %r8 + leaq -4*8(%rsi), %rsi + + movq %r11, -1*8(%rdi) + movq %r10, -2*8(%rdi) + movq %r9, -3*8(%rdi) + movq %r8, -4*8(%rdi) + leaq -4*8(%rdi), %rdi + jae 8b + /* + * Calculate copy position to head. + */ + addq $0x20, %rdx + subq %rdx, %rsi + subq %rdx, %rdi +1: + cmpq $16, %rdx + jb 9f + /* + * Move data from 16 bytes to 31 bytes. + */ + movq 0*8(%rsi), %r11 + movq 1*8(%rsi), %r10 + movq -2*8(%rsi, %rdx), %r9 + movq -1*8(%rsi, %rdx), %r8 + movq %r11, 0*8(%rdi) + movq %r10, 1*8(%rdi) + movq %r9, -2*8(%rdi, %rdx) + movq %r8, -1*8(%rdi, %rdx) + jmp 13f + .p2align 4 +9: + cmpq $8, %rdx + jb 10f + /* + * Move data from 8 bytes to 15 bytes. + */ + movq 0*8(%rsi), %r11 + movq -1*8(%rsi, %rdx), %r10 + movq %r11, 0*8(%rdi) + movq %r10, -1*8(%rdi, %rdx) + jmp 13f +10: + cmpq $4, %rdx + jb 11f + /* + * Move data from 4 bytes to 7 bytes. + */ + movl (%rsi), %r11d + movl -4(%rsi, %rdx), %r10d + movl %r11d, (%rdi) + movl %r10d, -4(%rdi, %rdx) + jmp 13f +11: + cmp $2, %rdx + jb 12f + /* + * Move data from 2 bytes to 3 bytes. + */ + movw (%rsi), %r11w + movw -2(%rsi, %rdx), %r10w + movw %r11w, (%rdi) + movw %r10w, -2(%rdi, %rdx) + jmp 13f +12: + cmp $1, %rdx + jb 13f + /* + * Move data for 1 byte. + */ + movb (%rsi), %r11b + movb %r11b, (%rdi) +13: + retq + CFI_ENDPROC +ENDPROC(memmove) diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c deleted file mode 100644 index 6d0f0ec41b3..00000000000 --- a/arch/x86/lib/memmove_64.c +++ /dev/null @@ -1,192 +0,0 @@ -/* Normally compiler builtins are used, but sometimes the compiler calls out - of line code. Based on asm-i386/string.h. - */ -#define _STRING_C -#include -#include - -#undef memmove -void *memmove(void *dest, const void *src, size_t count) -{ - unsigned long d0,d1,d2,d3,d4,d5,d6,d7; - char *ret; - - __asm__ __volatile__( - /* Handle more 32bytes in loop */ - "mov %2, %3\n\t" - "cmp $0x20, %0\n\t" - "jb 1f\n\t" - - /* Decide forward/backward copy mode */ - "cmp %2, %1\n\t" - "jb 2f\n\t" - - /* - * movsq instruction have many startup latency - * so we handle small size by general register. - */ - "cmp $680, %0\n\t" - "jb 3f\n\t" - /* - * movsq instruction is only good for aligned case. - */ - "cmpb %%dil, %%sil\n\t" - "je 4f\n\t" - "3:\n\t" - "sub $0x20, %0\n\t" - /* - * We gobble 32byts forward in each loop. - */ - "5:\n\t" - "sub $0x20, %0\n\t" - "movq 0*8(%1), %4\n\t" - "movq 1*8(%1), %5\n\t" - "movq 2*8(%1), %6\n\t" - "movq 3*8(%1), %7\n\t" - "leaq 4*8(%1), %1\n\t" - - "movq %4, 0*8(%2)\n\t" - "movq %5, 1*8(%2)\n\t" - "movq %6, 2*8(%2)\n\t" - "movq %7, 3*8(%2)\n\t" - "leaq 4*8(%2), %2\n\t" - "jae 5b\n\t" - "addq $0x20, %0\n\t" - "jmp 1f\n\t" - /* - * Handle data forward by movsq. - */ - ".p2align 4\n\t" - "4:\n\t" - "movq %0, %8\n\t" - "movq -8(%1, %0), %4\n\t" - "lea -8(%2, %0), %5\n\t" - "shrq $3, %8\n\t" - "rep movsq\n\t" - "movq %4, (%5)\n\t" - "jmp 13f\n\t" - /* - * Handle data backward by movsq. - */ - ".p2align 4\n\t" - "7:\n\t" - "movq %0, %8\n\t" - "movq (%1), %4\n\t" - "movq %2, %5\n\t" - "leaq -8(%1, %0), %1\n\t" - "leaq -8(%2, %0), %2\n\t" - "shrq $3, %8\n\t" - "std\n\t" - "rep movsq\n\t" - "cld\n\t" - "movq %4, (%5)\n\t" - "jmp 13f\n\t" - - /* - * Start to prepare for backward copy. - */ - ".p2align 4\n\t" - "2:\n\t" - "cmp $680, %0\n\t" - "jb 6f \n\t" - "cmp %%dil, %%sil\n\t" - "je 7b \n\t" - "6:\n\t" - /* - * Calculate copy position to tail. - */ - "addq %0, %1\n\t" - "addq %0, %2\n\t" - "subq $0x20, %0\n\t" - /* - * We gobble 32byts backward in each loop. - */ - "8:\n\t" - "subq $0x20, %0\n\t" - "movq -1*8(%1), %4\n\t" - "movq -2*8(%1), %5\n\t" - "movq -3*8(%1), %6\n\t" - "movq -4*8(%1), %7\n\t" - "leaq -4*8(%1), %1\n\t" - - "movq %4, -1*8(%2)\n\t" - "movq %5, -2*8(%2)\n\t" - "movq %6, -3*8(%2)\n\t" - "movq %7, -4*8(%2)\n\t" - "leaq -4*8(%2), %2\n\t" - "jae 8b\n\t" - /* - * Calculate copy position to head. - */ - "addq $0x20, %0\n\t" - "subq %0, %1\n\t" - "subq %0, %2\n\t" - "1:\n\t" - "cmpq $16, %0\n\t" - "jb 9f\n\t" - /* - * Move data from 16 bytes to 31 bytes. - */ - "movq 0*8(%1), %4\n\t" - "movq 1*8(%1), %5\n\t" - "movq -2*8(%1, %0), %6\n\t" - "movq -1*8(%1, %0), %7\n\t" - "movq %4, 0*8(%2)\n\t" - "movq %5, 1*8(%2)\n\t" - "movq %6, -2*8(%2, %0)\n\t" - "movq %7, -1*8(%2, %0)\n\t" - "jmp 13f\n\t" - ".p2align 4\n\t" - "9:\n\t" - "cmpq $8, %0\n\t" - "jb 10f\n\t" - /* - * Move data from 8 bytes to 15 bytes. - */ - "movq 0*8(%1), %4\n\t" - "movq -1*8(%1, %0), %5\n\t" - "movq %4, 0*8(%2)\n\t" - "movq %5, -1*8(%2, %0)\n\t" - "jmp 13f\n\t" - "10:\n\t" - "cmpq $4, %0\n\t" - "jb 11f\n\t" - /* - * Move data from 4 bytes to 7 bytes. - */ - "movl (%1), %4d\n\t" - "movl -4(%1, %0), %5d\n\t" - "movl %4d, (%2)\n\t" - "movl %5d, -4(%2, %0)\n\t" - "jmp 13f\n\t" - "11:\n\t" - "cmp $2, %0\n\t" - "jb 12f\n\t" - /* - * Move data from 2 bytes to 3 bytes. - */ - "movw (%1), %4w\n\t" - "movw -2(%1, %0), %5w\n\t" - "movw %4w, (%2)\n\t" - "movw %5w, -2(%2, %0)\n\t" - "jmp 13f\n\t" - "12:\n\t" - "cmp $1, %0\n\t" - "jb 13f\n\t" - /* - * Move data for 1 byte. - */ - "movb (%1), %4b\n\t" - "movb %4b, (%2)\n\t" - "13:\n\t" - : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) , - "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7) - :"0" (count), - "1" (src), - "2" (dest) - :"memory"); - - return ret; - -} -EXPORT_SYMBOL(memmove); -- cgit v1.2.3-70-g09d2 From b9ec40af0e18fb7d02106be148036c2ea490fdf9 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Feb 2011 11:02:24 +0100 Subject: percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b instructions. -tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and other cosmetic changes. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 48 +++++++++++++++++++++++++++++++++++ arch/x86/lib/Makefile | 1 + arch/x86/lib/cmpxchg16b_emu.S | 59 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 arch/x86/lib/cmpxchg16b_emu.S (limited to 'arch/x86/lib') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3788f4649db..260ac7af1fd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -451,6 +451,26 @@ do { \ #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #endif /* !CONFIG_M386 */ +#ifdef CONFIG_X86_CMPXCHG64 +#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ +({ \ + char __ret; \ + typeof(o1) __o1 = o1; \ + typeof(o1) __n1 = n1; \ + typeof(o2) __o2 = o2; \ + typeof(o2) __n2 = n2; \ + typeof(o2) __dummy = n2; \ + asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ + : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ + : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ + __ret; \ +}) + +#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#endif /* CONFIG_X86_CMPXCHG64 */ + /* * Per cpu atomic 64 bit operations are only available under 64 bit. * 32 bit must fall back to generic operations. @@ -480,6 +500,34 @@ do { \ #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) + +/* + * Pretty complex macro to generate cmpxchg16 instruction. The instruction + * is not supported on early AMD64 processors so we must be able to emulate + * it in software. The address used in the cmpxchg16 instruction must be + * aligned to a 16 byte boundary. + */ +#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ +({ \ + char __ret; \ + typeof(o1) __o1 = o1; \ + typeof(o1) __n1 = n1; \ + typeof(o2) __o2 = o2; \ + typeof(o2) __n2 = n2; \ + typeof(o2) __dummy; \ + alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ + X86_FEATURE_CX16, \ + ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ + "S" (&pcp1), "b"(__n1), "c"(__n2), \ + "a"(__o1), "d"(__o2)); \ + __ret; \ +}) + +#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) +#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) +#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) + #endif /* This is not atomic against other CPUs -- CPU preemption needs to be off */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede..f2479f19ddd 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -42,4 +42,5 @@ else lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o + lib-y += cmpxchg16b_emu.o endif diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 00000000000..3e8b08a6de2 --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -0,0 +1,59 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + */ +#include +#include +#include +#include + +.text + +/* + * Inputs: + * %rsi : memory location to compare + * %rax : low 64 bits of old value + * %rdx : high 64 bits of old value + * %rbx : low 64 bits of new value + * %rcx : high 64 bits of new value + * %al : Operation successful + */ +ENTRY(this_cpu_cmpxchg16b_emu) +CFI_STARTPROC + +# +# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not +# via the ZF. Caller will access %al to get result. +# +# Note that this is only useful for a cpuops operation. Meaning that we +# do *not* have a fully atomic operation but just an operation that is +# *atomic* on a single cpu (as provided by the this_cpu_xx class of +# macros). +# +this_cpu_cmpxchg16b_emu: + pushf + cli + + cmpq %gs:(%rsi), %rax + jne not_same + cmpq %gs:8(%rsi), %rdx + jne not_same + + movq %rbx, %gs:(%rsi) + movq %rcx, %gs:8(%rsi) + + popf + mov $1, %al + ret + + not_same: + popf + xor %al,%al + ret + +CFI_ENDPROC + +ENDPROC(this_cpu_cmpxchg16b_emu) -- cgit v1.2.3-70-g09d2 From 39f2205e1abd1b6fffdaf45e1f1c3049a5f8999c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 28 Feb 2011 15:31:59 +0000 Subject: x86-64: Add CFI annotations to lib/rwsem_64.S These weren't part of the initial commit of this code. Signed-off-by: Jan Beulich Cc: Alexander van Heukelum LKML-Reference: <4D6BCDFF02000078000341B0@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/rwsem_64.S | 56 ++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 22 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 41fcf00e49d..67743977398 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S @@ -23,43 +23,50 @@ #include #define save_common_regs \ - pushq %rdi; \ - pushq %rsi; \ - pushq %rcx; \ - pushq %r8; \ - pushq %r9; \ - pushq %r10; \ - pushq %r11 + pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ + pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ + pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ + pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ + pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ + pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ + pushq_cfi %r11; CFI_REL_OFFSET r11, 0 #define restore_common_regs \ - popq %r11; \ - popq %r10; \ - popq %r9; \ - popq %r8; \ - popq %rcx; \ - popq %rsi; \ - popq %rdi + popq_cfi %r11; CFI_RESTORE r11; \ + popq_cfi %r10; CFI_RESTORE r10; \ + popq_cfi %r9; CFI_RESTORE r9; \ + popq_cfi %r8; CFI_RESTORE r8; \ + popq_cfi %rcx; CFI_RESTORE rcx; \ + popq_cfi %rsi; CFI_RESTORE rsi; \ + popq_cfi %rdi; CFI_RESTORE rdi /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC save_common_regs - pushq %rdx + pushq_cfi %rdx + CFI_REL_OFFSET rdx, 0 movq %rax,%rdi call rwsem_down_read_failed - popq %rdx + popq_cfi %rdx + CFI_RESTORE rdx restore_common_regs ret - ENDPROC(call_rwsem_down_read_failed) + CFI_ENDPROC +ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC save_common_regs movq %rax,%rdi call rwsem_down_write_failed restore_common_regs ret - ENDPROC(call_rwsem_down_write_failed) + CFI_ENDPROC +ENDPROC(call_rwsem_down_write_failed) ENTRY(call_rwsem_wake) + CFI_STARTPROC decl %edx /* do nothing if still outstanding active readers */ jnz 1f save_common_regs @@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake) call rwsem_wake restore_common_regs 1: ret - ENDPROC(call_rwsem_wake) + CFI_ENDPROC +ENDPROC(call_rwsem_wake) /* Fix up special calling conventions */ ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC save_common_regs - pushq %rdx + pushq_cfi %rdx + CFI_REL_OFFSET rdx, 0 movq %rax,%rdi call rwsem_downgrade_wake - popq %rdx + popq_cfi %rdx + CFI_RESTORE rdx restore_common_regs ret - ENDPROC(call_rwsem_downgrade_wake) + CFI_ENDPROC +ENDPROC(call_rwsem_downgrade_wake) -- cgit v1.2.3-70-g09d2 From 60cf637a13932a4750da6746efd0199e8a4c341b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 28 Feb 2011 15:54:40 +0000 Subject: x86: Use {push,pop}_cfi in more places Cleaning up and shortening code... Signed-off-by: Jan Beulich Cc: Alexander van Heukelum LKML-Reference: <4D6BD35002000078000341DA@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 27 ++++++------------ arch/x86/include/asm/frame.h | 6 ++-- arch/x86/kernel/entry_32.S | 3 +- arch/x86/lib/atomic64_386_32.S | 6 ++-- arch/x86/lib/atomic64_cx8_32.S | 6 ++-- arch/x86/lib/checksum_32.S | 63 ++++++++++++++---------------------------- arch/x86/lib/semaphore_32.S | 36 ++++++++---------------- 7 files changed, 49 insertions(+), 98 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 518bb99c339..7c6aabd01fc 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -126,26 +126,20 @@ ENTRY(ia32_sysenter_target) */ ENABLE_INTERRUPTS(CLBR_NONE) movl %ebp,%ebp /* zero extension */ - pushq $__USER32_DS - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi $__USER32_DS /*CFI_REL_OFFSET ss,0*/ - pushq %rbp - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %rbp CFI_REL_OFFSET rsp,0 - pushfq - CFI_ADJUST_CFA_OFFSET 8 + pushfq_cfi /*CFI_REL_OFFSET rflags,0*/ movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d CFI_REGISTER rip,r10 - pushq $__USER32_CS - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi $__USER32_CS /*CFI_REL_OFFSET cs,0*/ movl %eax, %eax - pushq %r10 - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %r10 CFI_REL_OFFSET rip,0 - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %rax cld SAVE_ARGS 0,0,1 /* no need to do an access_ok check here because rbp has been @@ -182,11 +176,9 @@ sysexit_from_sys_call: xorq %r9,%r9 xorq %r10,%r10 xorq %r11,%r11 - popfq - CFI_ADJUST_CFA_OFFSET -8 + popfq_cfi /*CFI_RESTORE rflags*/ - popq %rcx /* User %esp */ - CFI_ADJUST_CFA_OFFSET -8 + popq_cfi %rcx /* User %esp */ CFI_REGISTER rsp,rcx TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 @@ -421,8 +413,7 @@ ENTRY(ia32_syscall) */ ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%eax - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %rax cld /* note the registers are not zero extended to the sf. this could be a problem. */ diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 06850a7194e..2c6fc9e6281 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -7,14 +7,12 @@ frame pointer later */ #ifdef CONFIG_FRAME_POINTER .macro FRAME - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebp CFI_REL_OFFSET ebp,0 movl %esp,%ebp .endm .macro ENDFRAME - popl %ebp - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebp CFI_RESTORE ebp .endm #else diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 49bdedd2dbc..2878821cb8c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1409,8 +1409,7 @@ END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) RING0_EC_FRAME - pushl $do_async_page_fault - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi $do_async_page_fault jmp error_code CFI_ENDPROC END(apf_page_fault) diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 2cda60a06e6..e8e7e0d06f4 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -15,14 +15,12 @@ /* if you want SMP support, implement these with real spinlocks */ .macro LOCK reg - pushfl - CFI_ADJUST_CFA_OFFSET 4 + pushfl_cfi cli .endm .macro UNLOCK reg - popfl - CFI_ADJUST_CFA_OFFSET -4 + popfl_cfi .endm #define BEGIN(op) \ diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 71e080de335..391a083674b 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -14,14 +14,12 @@ #include .macro SAVE reg - pushl %\reg - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %\reg CFI_REL_OFFSET \reg, 0 .endm .macro RESTORE reg - popl %\reg - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %\reg CFI_RESTORE \reg .endm diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index adbccd0bbb7..78d16a554db 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) */ ENTRY(csum_partial) CFI_STARTPROC - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len @@ -132,11 +130,9 @@ ENTRY(csum_partial) jz 8f roll $8, %eax 8: - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi ret CFI_ENDPROC @@ -148,11 +144,9 @@ ENDPROC(csum_partial) ENTRY(csum_partial) CFI_STARTPROC - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len @@ -260,11 +254,9 @@ ENTRY(csum_partial) jz 90f roll $8, %eax 90: - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi ret CFI_ENDPROC @@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic) CFI_STARTPROC subl $4,%esp CFI_ADJUST_CFA_OFFSET 4 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edi CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+12(%esp),%ecx # len @@ -426,17 +415,13 @@ DST( movb %cl, (%edi) ) .previous - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi - popl %edi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edi CFI_RESTORE edi - popl %ecx # equivalent to addl $4,%esp - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ecx # equivalent to addl $4,%esp ret CFI_ENDPROC ENDPROC(csum_partial_copy_generic) @@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic) ENTRY(csum_partial_copy_generic) CFI_STARTPROC - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edi CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 movl ARGBASE+4(%esp),%esi #src movl ARGBASE+8(%esp),%edi #dst @@ -527,14 +509,11 @@ DST( movb %dl, (%edi) ) jmp 7b .previous - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi - popl %edi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edi CFI_RESTORE edi - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx ret CFI_ENDPROC diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 648fe474178..48e44f7ed76 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S @@ -74,29 +74,23 @@ ENTRY(__read_lock_failed) /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 - push %edx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edx CFI_REL_OFFSET edx,0 call rwsem_down_read_failed - pop %edx - CFI_ADJUST_CFA_OFFSET -4 - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edx + popl_cfi %ecx ret CFI_ENDPROC ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 calll rwsem_down_write_failed - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ecx ret CFI_ENDPROC ENDPROC(call_rwsem_down_write_failed) @@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake) CFI_STARTPROC decw %dx /* do nothing if still outstanding active readers */ jnz 1f - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 call rwsem_wake - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ecx 1: ret CFI_ENDPROC ENDPROC(call_rwsem_wake) @@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake) /* Fix up special calling conventions */ ENTRY(call_rwsem_downgrade_wake) CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 - push %edx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edx CFI_REL_OFFSET edx,0 call rwsem_downgrade_wake - pop %edx - CFI_ADJUST_CFA_OFFSET -4 - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edx + popl_cfi %ecx ret CFI_ENDPROC ENDPROC(call_rwsem_downgrade_wake) -- cgit v1.2.3-70-g09d2 From 039e13890b0615cb8c5c04b6afa84d676e24c761 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 28 Feb 2011 15:56:00 +0000 Subject: x86: Remove unused bits from lib/thunk_*.S Some of the items removed were apparently never used, others simply didn't get removed with their last user. Signed-off-by: Jan Beulich LKML-Reference: <4D6BD3A002000078000341F1@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/thunk_32.S | 18 ------------------ arch/x86/lib/thunk_64.S | 27 --------------------------- 2 files changed, 45 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 650b11e00ec..2930ae05d77 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -7,24 +7,6 @@ #include -#define ARCH_TRACE_IRQS_ON \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_on; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -#define ARCH_TRACE_IRQS_OFF \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_off; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ .macro thunk_ra name,func diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index bf9a7d5a542..782b082c9ff 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -22,26 +22,6 @@ CFI_ENDPROC .endm - /* rdi: arg1 ... normal C conventions. rax is passed from C. */ - .macro thunk_retrax name,func - .globl \name -\name: - CFI_STARTPROC - SAVE_ARGS - call \func - jmp restore_norax - CFI_ENDPROC - .endm - - - .section .sched.text, "ax" -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM - thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed - thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed - thunk rwsem_wake_thunk,rwsem_wake - thunk rwsem_downgrade_thunk,rwsem_downgrade_wake -#endif - #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in rdi (arg1) */ .macro thunk_ra name,func @@ -72,10 +52,3 @@ restore: RESTORE_ARGS ret CFI_ENDPROC - - CFI_STARTPROC - SAVE_ARGS -restore_norax: - RESTORE_ARGS 1 - ret - CFI_ENDPROC -- cgit v1.2.3-70-g09d2 From e938c287ea8d977e079f07464ac69923412663ce Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 1 Mar 2011 14:28:02 +0000 Subject: x86: Fix a bogus unwind annotation in lib/semaphore_32.S 'simple' would have required specifying current frame address and return address location manually, but that's obviously not the case (and not necessary) here. Signed-off-by: Jan Beulich LKML-Reference: <4D6D1082020000780003454C@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/lib/semaphore_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 48e44f7ed76..06691daa410 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S @@ -36,7 +36,7 @@ */ #ifdef CONFIG_SMP ENTRY(__write_lock_failed) - CFI_STARTPROC simple + CFI_STARTPROC FRAME 2: LOCK_PREFIX addl $ RW_LOCK_BIAS,(%eax) -- cgit v1.2.3-70-g09d2