summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2007-03-22 09:34:13 -0500
committerPaul Mackerras <paulus@samba.org>2007-04-13 03:55:13 +1000
commit3467bfd340f9ad48f3732415533a2e9c18240b62 (patch)
tree91f57918199d9508868aa0889a5b2aca4cc1da13
parent569975591c5530fdc9c7a3c45122e5e46f075a74 (diff)
[POWERPC] Use mtocrf instruction in asm when CONFIG_POWER4_ONLY=y
mtocrf is a faster single-field mtcrf (move to condition register fields) instruction available in POWER4 and later processors. It can make quite a difference in performance on some implementations, so use it for CONFIG_POWER4_ONLY builds. Signed-off-by: Olof Johansson <olof@lixom.net> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/lib/copyuser_64.S6
-rw-r--r--arch/powerpc/lib/mem_64.S6
-rw-r--r--arch/powerpc/lib/memcpy_64.S6
-rw-r--r--include/asm-powerpc/asm-compat.h10
4 files changed, 19 insertions, 9 deletions
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a6b54cb97c4..25ec5378afa 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
- mtcrf 0x01,r5
+ PPC_MTOCRF 0x01,r5
blt cr1,.Lshort_copy
bne .Ldst_unaligned
.Ldst_aligned:
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user)
b .Ldo_tail
.Ldst_unaligned:
- mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */
+ PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi r1,r5,16
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user)
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
-3: mtcrf 0x01,r5
+3: PPC_MTOCRF 0x01,r5
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 68df20283ff..11ce045e21f 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- mtcrf 1,r0
+ PPC_MTOCRF 1,r0
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
- mtcrf 1,r0
+ PPC_MTOCRF 1,r0
beq 8f
bf 29,6f
std r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
- mtcrf 1,r5
+ PPC_MTOCRF 1,r5
beqlr+
bf 29,9f
stw r4,0(r6)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 7173ba98f42..3f131129d1c 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
.align 7
_GLOBAL(memcpy)
std r3,48(r1) /* save destination pointer for return value */
- mtcrf 0x01,r5
+ PPC_MTOCRF 0x01,r5
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
andi. r6,r6,7
@@ -128,7 +128,7 @@ _GLOBAL(memcpy)
b .Ldo_tail
.Ldst_unaligned:
- mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7
+ PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
cmpldi r1,r5,16
@@ -143,7 +143,7 @@ _GLOBAL(memcpy)
2: bf cr7*4+1,3f
lwzx r0,r7,r4
stwx r0,r7,r3
-3: mtcrf 0x01,r5
+3: PPC_MTOCRF 0x01,r5
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
diff --git a/include/asm-powerpc/asm-compat.h b/include/asm-powerpc/asm-compat.h
index c89bd58ee28..c19e7367fce 100644
--- a/include/asm-powerpc/asm-compat.h
+++ b/include/asm-powerpc/asm-compat.h
@@ -78,6 +78,15 @@
#define PPC_STLCX stringify_in_c(stdcx.)
#define PPC_CNTLZL stringify_in_c(cntlzd)
+/* Move to CR, single-entry optimized version. Only available
+ * on POWER4 and later.
+ */
+#ifdef CONFIG_POWER4_ONLY
+#define PPC_MTOCRF stringify_in_c(mtocrf)
+#else
+#define PPC_MTOCRF stringify_in_c(mtcrf)
+#endif
+
#else /* 32-bit */
/* operations for longs and pointers */
@@ -89,6 +98,7 @@
#define PPC_LLARX stringify_in_c(lwarx)
#define PPC_STLCX stringify_in_c(stwcx.)
#define PPC_CNTLZL stringify_in_c(cntlzw)
+#define PPC_MTOCRF stringify_in_c(mtcrf)
#endif