From a7e926abc3adfbd2e5e20d2b46177adb4e313915 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:25 +0100 Subject: x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/lib/atomic64_386_32.S | 175 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 arch/x86/lib/atomic64_386_32.S (limited to 'arch/x86/lib/atomic64_386_32.S') diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 00000000000..5db07fe4a0c --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S @@ -0,0 +1,175 @@ +/* + * atomic64_t for 386/486 + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include + +/* if you want SMP support, implement these with real spinlocks */ +.macro LOCK reg + pushfl + CFI_ADJUST_CFA_OFFSET 4 + cli +.endm + +.macro UNLOCK reg + popfl + CFI_ADJUST_CFA_OFFSET -4 +.endm + +.macro BEGIN func reg +$v = \reg + +ENTRY(atomic64_\func\()_386) + CFI_STARTPROC + LOCK $v + +.macro RETURN + UNLOCK $v + ret +.endm + +.macro END_ + CFI_ENDPROC +ENDPROC(atomic64_\func\()_386) +.purgem RETURN +.purgem END_ +.purgem END +.endm + +.macro END +RETURN +END_ +.endm +.endm + +BEGIN read %ecx + movl ($v), %eax + movl 4($v), %edx +END + +BEGIN set %esi + movl %ebx, ($v) + movl %ecx, 4($v) +END + +BEGIN xchg %esi + movl ($v), %eax + movl 4($v), %edx + movl %ebx, ($v) + movl %ecx, 4($v) +END + +BEGIN add %ecx + addl %eax, ($v) + adcl %edx, 4($v) +END + +BEGIN add_return %ecx + addl ($v), %eax + adcl 4($v), %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN sub %ecx + subl %eax, ($v) + sbbl %edx, 4($v) +END + +BEGIN sub_return %ecx + negl %edx + negl %eax + sbbl $0, %edx + addl ($v), %eax + adcl 4($v), %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN inc %esi + addl $1, ($v) + adcl $0, 4($v) +END + +BEGIN inc_return %esi + movl ($v), %eax + movl 4($v), %edx + addl $1, %eax + adcl $0, %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN dec %esi + subl $1, ($v) + sbbl $0, 4($v) +END + +BEGIN dec_return %esi + movl ($v), %eax + movl 4($v), %edx + subl $1, %eax + sbbl $0, %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN add_unless %ecx + addl %eax, %esi + adcl %edx, %edi + addl ($v), %eax + adcl 4($v), %edx + cmpl %eax, %esi + je 3f +1: + movl %eax, ($v) + movl %edx, 4($v) + xorl %eax, %eax +2: +RETURN +3: + cmpl %edx, %edi + jne 1b + movl $1, %eax + jmp 2b +END_ + +BEGIN inc_not_zero %esi + movl ($v), %eax + movl 4($v), %edx + testl %eax, %eax + je 3f +1: + addl $1, %eax + adcl $0, %edx + movl %eax, ($v) + movl %edx, 4($v) + xorl %eax, %eax +2: +RETURN +3: + testl %edx, %edx + jne 1b + movl $1, %eax + jmp 2b +END_ + +BEGIN dec_if_positive %esi + movl ($v), %eax + movl 4($v), %edx + subl $1, %eax + sbbl $0, %edx + js 1f + movl %eax, ($v) + movl %edx, 4($v) +1: +END -- cgit v1.2.3-70-g09d2 From 6e6104fe085026e6ef82cc5cc303d6c8ceb7e411 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:46 +0100 Subject: x86-32: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The implementation did the opposite thing. Reported-by: H. Peter Anvin Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/lib/atomic64_386_32.S | 4 ++-- arch/x86/lib/atomic64_cx8_32.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib/atomic64_386_32.S') diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 5db07fe4a0c..a2f847c88b8 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -133,13 +133,13 @@ BEGIN add_unless %ecx 1: movl %eax, ($v) movl %edx, 4($v) - xorl %eax, %eax + movl $1, %eax 2: RETURN 3: cmpl %edx, %edi jne 1b - movl $1, %eax + xorl %eax, %eax jmp 2b END_ diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index e49c4ebca9f..d0e37b189f8 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -180,7 +180,7 @@ ENTRY(atomic64_add_unless_cx8) cmpxchg8b (%ebp) jne 1b - xorl %eax, %eax + movl $1, %eax 3: addl $8, %esp CFI_ADJUST_CFA_OFFSET -8 @@ -190,7 +190,7 @@ ENTRY(atomic64_add_unless_cx8) 4: cmpl %edx, 4(%esp) jne 2b - movl $1, %eax + xorl %eax, %eax jmp 3b CFI_ENDPROC ENDPROC(atomic64_add_unless_cx8) -- cgit v1.2.3-70-g09d2 From f3e83131469e29032a700217aa394996107b8fc5 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:49 +0100 Subject: x86-32: Fix atomic64_inc_not_zero return value convention atomic64_inc_not_zero must return 1 if it perfomed the add and 0 otherwise. It was doing the opposite thing. Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-6-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- arch/x86/lib/atomic64_386_32.S | 3 +-- arch/x86/lib/atomic64_cx8_32.S | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib/atomic64_386_32.S') diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index a2f847c88b8..4a5979aa688 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -153,13 +153,12 @@ BEGIN inc_not_zero %esi adcl $0, %edx movl %eax, ($v) movl %edx, 4($v) - xorl %eax, %eax + movl $1, %eax 2: RETURN 3: testl %edx, %edx jne 1b - movl $1, %eax jmp 2b END_ diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index d0e37b189f8..71e080de335 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -212,14 +212,13 @@ ENTRY(atomic64_inc_not_zero_cx8) cmpxchg8b (%esi) jne 1b - xorl %eax, %eax + movl $1, %eax 3: RESTORE ebx ret 4: testl %edx, %edx jne 2b - movl $1, %eax jmp 3b CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) -- cgit v1.2.3-70-g09d2