summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-08-12 16:28:09 +0000
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-11-29 15:48:17 +1100
commit64ff31287693c1f325cb9cb049569c1611438ef1 (patch)
tree3c159d2ca6f967fca13bae17cff19f92e0b3896c /arch/powerpc/include
parent72083646528d4887b920deb71b37e09bc7d227bb (diff)
powerpc: Add support for popcnt instructions
POWER5 added popcntb, and POWER7 added popcntw and popcntd. As a first step this patch does all the work out of line, but it would be nice to implement them as inlines with an out of line fallback. The performance issue with hweight was noticed when disabling SMT on a large (192 thread) POWER7 box. The patch improves that testcase by about 8%. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/include')
-rw-r--r--arch/powerpc/include/asm/bitops.h9
-rw-r--r--arch/powerpc/include/asm/cputable.h9
2 files changed, 15 insertions, 3 deletions
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 30964ae2d09..8a7e9314c68 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -267,7 +267,16 @@ static __inline__ int fls64(__u64 x)
#include <asm-generic/bitops/fls64.h>
#endif /* __powerpc64__ */
+#ifdef CONFIG_PPC64
+unsigned int __arch_hweight8(unsigned int w);
+unsigned int __arch_hweight16(unsigned int w);
+unsigned int __arch_hweight32(unsigned int w);
+unsigned long __arch_hweight64(__u64 w);
+#include <asm-generic/bitops/const_hweight.h>
+#else
#include <asm-generic/bitops/hweight.h>
+#endif
+
#include <asm-generic/bitops/find.h>
/* Little-endian versions */
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index f3a1fdd9cf0..f0a211d9692 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -199,6 +199,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0200000000000000)
+#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0400000000000000)
+#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0800000000000000)
#ifndef __ASSEMBLY__
@@ -403,21 +405,22 @@ extern const char *powerpc_base_platform;
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
- CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS)
+ CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
+ CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \