From 5136dea5734cfddbc6d7ccb7ead85a3ac7ce3de2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 May 2008 16:10:41 -0700 Subject: x86: bitops take an unsigned long * All (or most) other architectures do this. So should x86. Fix. Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/bitops.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'include/asm-x86/bitops.h') diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index ee4b3ead6a4..7d2494bdc66 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -43,7 +43,7 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(int nr, volatile void *addr) +static inline void set_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -57,7 +57,7 @@ static inline void set_bit(int nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile void *addr) +static inline void __set_bit(int nr, volatile unsigned long *addr) { asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } @@ -72,7 +72,7 @@ static inline void __set_bit(int nr, volatile void *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile void *addr) +static inline void clear_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); } @@ -85,13 +85,13 @@ static inline void clear_bit(int nr, volatile void *addr) * clear_bit() is atomic and implies release semantics before the memory * operation. It can be used for an unlock. */ -static inline void clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); clear_bit(nr, addr); } -static inline void __clear_bit(int nr, volatile void *addr) +static inline void __clear_bit(int nr, volatile unsigned long *addr) { asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); } @@ -108,7 +108,7 @@ static inline void __clear_bit(int nr, volatile void *addr) * No memory barrier is required here, because x86 cannot reorder stores past * older loads. Same principle as spin_unlock. */ -static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) { barrier(); __clear_bit(nr, addr); @@ -126,7 +126,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile void *addr) +static inline void __change_bit(int nr, volatile unsigned long *addr) { asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); } @@ -140,7 +140,7 @@ static inline void __change_bit(int nr, volatile void *addr) * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void change_bit(int nr, volatile void *addr) +static inline void change_bit(int nr, volatile unsigned long *addr) { asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); } @@ -153,7 +153,7 @@ static inline void change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_set_bit(int nr, volatile void *addr) +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -170,7 +170,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile void *addr) +static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -184,7 +184,7 @@ static inline int test_and_set_bit_lock(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile void *addr) +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -203,7 +203,7 @@ static inline int __test_and_set_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_clear_bit(int nr, volatile void *addr) +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -223,7 +223,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile void *addr) +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -235,7 +235,7 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, volatile void *addr) +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -255,7 +255,7 @@ static inline int __test_and_change_bit(int nr, volatile void *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static inline int test_and_change_bit(int nr, volatile void *addr) +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) { int oldbit; @@ -266,13 +266,13 @@ static inline int test_and_change_bit(int nr, volatile void *addr) return oldbit; } -static inline int constant_test_bit(int nr, const volatile void *addr) +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; } -static inline int variable_test_bit(int nr, volatile const void *addr) +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) { int oldbit; -- cgit v1.2.3-70-g09d2 From 1a750e0cd7a30c478723ecfa1df685efcdd38a90 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 Jun 2008 21:03:26 -0700 Subject: x86, bitops: make constant-bit set/clear_bit ops faster On Wed, 18 Jun 2008, Linus Torvalds wrote: > > And yes, the "lock andl" should be noticeably faster than the xchgl. I dunno. Here's a untested (!!) patch that turns constant-bit set/clear_bit ops into byte mask ops (lock orb/andb). It's not exactly pretty. The reason for using the byte versions is that a locked op is serialized in the memory pipeline anyway, so there are no forwarding issues (that could slow down things when we access things with different sizes), and the byte ops are a lot smaller than 32-bit and particularly 64-bit ops (big constants, and the 64-bit ops need the REX prefix byte too). [ Side note: I wonder if we should turn the "test_bit()" C version into a "char *" version too.. It could actually help with alias analysis, since char pointers can alias anything. So it might be the RightThing(tm) to do for multiple reasons. I dunno. It's a separate issue. ] It does actually shrink the kernel image a bit (a couple of hundred bytes on the text segment for my everything-compiled-in image), and while it's totally untested the (admittedly few) code generation points I looked at seemed sane. And "lock orb" should be noticeably faster than "lock bts". If somebody wants to play with it, go wild. I didn't do "change_bit()", because nobody sane uses that thing anyway. I guarantee nothing. And if it breaks, nobody saw me do anything. You can't prove this email wasn't sent by somebody who is good at forging smtp. This does require a gcc that is recent enough for "__builtin_constant_p()" to work in an inline function, but I suspect our kernel requirements are already higher than that. And if you do have an old gcc that is supported, the worst that would happen is that the optimization doesn't trigger. Signed-off-by: Ingo Molnar --- include/asm-x86/bitops.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'include/asm-x86/bitops.h') diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 7d2494bdc66..ab7635a4acd 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -23,11 +23,22 @@ #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) /* Technically wrong, but this avoids compilation errors on some gcc versions. */ -#define ADDR "=m" (*(volatile long *) addr) +#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) #else -#define ADDR "+m" (*(volatile long *) addr) +#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) #endif +#define ADDR BITOP_ADDR(addr) + +/* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ +#define IS_IMMEDIATE(nr) \ + (__builtin_constant_p(nr)) +#define CONST_MASK_ADDR BITOP_ADDR(addr + (nr>>3)) +#define CONST_MASK (1 << (nr & 7)) + /** * set_bit - Atomically set a bit in memory * @nr: the bit to set @@ -43,11 +54,15 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(int nr, volatile unsigned long *addr) +static inline void set_bit(unsigned int nr, volatile unsigned long *addr) { - asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + if (IS_IMMEDIATE(nr)) + asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR : "i" (CONST_MASK) : "memory"); + else + asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); } + /** * __set_bit - Set a bit in memory * @nr: the bit to set @@ -74,7 +89,10 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) */ static inline void clear_bit(int nr, volatile unsigned long *addr) { - asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); + if (IS_IMMEDIATE(nr)) + asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR : "i" (~CONST_MASK)); + else + asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); } /* -- cgit v1.2.3-70-g09d2 From 7dbceaf9bb68919651901b101f44edd5391ee489 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Jun 2008 07:28:24 +0200 Subject: x86, bitops: make constant-bit set/clear_bit ops faster, adapt, clean up fix integration bug introduced by "x86: bitops take an unsigned long *" which turned "(void *) + x" into "(long *) + x". small cleanups to make it more apparent which value get propagated where. Signed-off-by: Ingo Molnar --- include/asm-x86/bitops.h | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'include/asm-x86/bitops.h') diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index ab7635a4acd..6c505481971 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -28,16 +28,15 @@ #define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) #endif -#define ADDR BITOP_ADDR(addr) +#define ADDR BITOP_ADDR(addr) /* * We do the locked ops that don't return the old value as * a mask operation on a byte. */ -#define IS_IMMEDIATE(nr) \ - (__builtin_constant_p(nr)) -#define CONST_MASK_ADDR BITOP_ADDR(addr + (nr>>3)) -#define CONST_MASK (1 << (nr & 7)) +#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK(nr) (1 << ((nr) & 7)) /** * set_bit - Atomically set a bit in memory @@ -56,13 +55,17 @@ */ static inline void set_bit(unsigned int nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) - asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR : "i" (CONST_MASK) : "memory"); - else - asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "orb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "i" (CONST_MASK(nr)) + : "memory"); + } else { + asm volatile(LOCK_PREFIX "bts %1,%0" + : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + } } - /** * __set_bit - Set a bit in memory * @nr: the bit to set @@ -89,10 +92,15 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) */ static inline void clear_bit(int nr, volatile unsigned long *addr) { - if (IS_IMMEDIATE(nr)) - asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR : "i" (~CONST_MASK)); - else - asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "andb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "i" (~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btr %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } } /* -- cgit v1.2.3-70-g09d2 From 437a0a54eea7b101e8a5b70688009956f6522ed0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Jun 2008 21:50:20 +0200 Subject: x86, bitops: make constant-bit set/clear_bit ops faster, gcc workaround Jeremy Fitzhardinge reported this compiler bug: Suggestion from Linus: add "r" to the input constraint of the set_bit()/clear_bit()'s constant 'nr' branch: Blows up on "gcc version 3.4.4 20050314 (prerelease) (Debian 3.4.3-13)": CC init/main.o include2/asm/bitops.h: In function `start_kernel': include2/asm/bitops.h:59: warning: asm operand 1 probably doesn't match constraints include2/asm/bitops.h:59: warning: asm operand 1 probably doesn't match constraints include2/asm/bitops.h:59: warning: asm operand 1 probably doesn't match constraints include2/asm/bitops.h:59: error: impossible constraint in `asm' include2/asm/bitops.h:59: error: impossible constraint in `asm' include2/asm/bitops.h:59: error: impossible constraint in `asm' Reported-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/asm-x86/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-x86/bitops.h') diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 6c505481971..96b1829cea1 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -58,7 +58,7 @@ static inline void set_bit(unsigned int nr, volatile unsigned long *addr) if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR(nr, addr) - : "i" (CONST_MASK(nr)) + : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { asm volatile(LOCK_PREFIX "bts %1,%0" @@ -95,7 +95,7 @@ static inline void clear_bit(int nr, volatile unsigned long *addr) if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR(nr, addr) - : "i" (~CONST_MASK(nr))); + : "iq" ((u8)~CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX "btr %1,%0" : BITOP_ADDR(addr) -- cgit v1.2.3-70-g09d2