summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sparc64/kernel/entry.S6
-rw-r--r--arch/sparc64/kernel/semaphore.c12
-rw-r--r--arch/sparc64/kernel/trampoline.S3
-rw-r--r--arch/sparc64/lib/U1memcpy.S103
-rw-r--r--arch/sparc64/lib/VISsave.S15
-rw-r--r--arch/sparc64/lib/atomic.S42
-rw-r--r--arch/sparc64/lib/bitops.S31
-rw-r--r--arch/sparc64/lib/debuglocks.c6
-rw-r--r--arch/sparc64/lib/dec_and_lock.S6
-rw-r--r--arch/sparc64/lib/rwsem.S15
-rw-r--r--arch/sparc64/mm/init.c6
-rw-r--r--arch/sparc64/mm/ultra.S3
-rw-r--r--include/asm-sparc64/rwsem.h3
-rw-r--r--include/asm-sparc64/spinlock.h29
-rw-r--r--include/asm-sparc64/spitfire.h1
15 files changed, 171 insertions, 110 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index a47f2d0b1a2..ffe717ab7f8 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
fmuld %f0, %f2, %f26
faddd %f0, %f2, %f28
fmuld %f0, %f2, %f30
+ membar #Sync
b,pt %xcc, fpdis_exit
- membar #Sync
+ nop
2: andcc %g5, FPRS_DU, %g0
bne,pt %icc, 3f
fzero %f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
fmuld %f32, %f34, %f58
faddd %f32, %f34, %f60
fmuld %f32, %f34, %f62
+ membar #Sync
ba,pt %xcc, fpdis_exit
- membar #Sync
+ nop
3: mov SECONDARY_CONTEXT, %g3
add %g6, TI_FPREGS, %g1
ldxa [%g3] ASI_DMMU, %g5
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
index 63496c43fe1..a809e63f03e 100644
--- a/arch/sparc64/kernel/semaphore.c
+++ b/arch/sparc64/kernel/semaphore.c
@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
" add %1, %4, %1\n"
" cas [%3], %0, %1\n"
" cmp %0, %1\n"
+" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
: "r" (&sem->count), "r" (incr), "m" (sem->count)
: "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
" cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n"
" addcc %%g7, 1, %%g0\n"
+" membar #StoreLoad | #StoreStore\n"
" ble,pn %%icc, 3f\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
"2:\n"
" .subsection 2\n"
"3: mov %0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
" cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n"
" cmp %%g7, 1\n"
+" membar #StoreLoad | #StoreStore\n"
" bl,pn %%icc, 3f\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
"2:\n"
" .subsection 2\n"
"3: mov %0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
" cmp %%g1, %%g7\n"
" bne,pn %%icc, 1b\n"
" cmp %%g7, 1\n"
+" membar #StoreLoad | #StoreStore\n"
" bl,pn %%icc, 3f\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
"2:\n"
" .subsection 2\n"
"3: mov %2, %%g1\n"
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 2c8f9344b4e..3a145fc39cf 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -98,8 +98,9 @@ startup_continue:
sethi %hi(prom_entry_lock), %g2
1: ldstub [%g2 + %lo(prom_entry_lock)], %g1
+ membar #StoreLoad | #StoreStore
brnz,pn %g1, 1b
- membar #StoreLoad | #StoreStore
+ nop
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index da9b520c718..bafd2fc07ac 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -87,14 +87,17 @@
#define LOOP_CHUNK3(src, dest, len, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
+#define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \
EX_ST(STORE_BLK(%fsrc, %dest)); \
- add %dest, 0x40, %dest;
+ add %dest, 0x40, %dest; \
+ DO_SYNC
#define STORE_JUMP(dest, fsrc, target) \
EX_ST(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest; \
- ba,pt %xcc, target;
+ ba,pt %xcc, target; \
+ nop;
#define FINISH_VISCHUNK(dest, f0, f1, left) \
subcc %left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f0, %f2, %f48
1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
- STORE_JUMP(o0, f48, 40f) membar #Sync
+ STORE_JUMP(o0, f48, 40f)
2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
- STORE_JUMP(o0, f48, 48f) membar #Sync
+ STORE_JUMP(o0, f48, 48f)
3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
- STORE_JUMP(o0, f48, 56f) membar #Sync
+ STORE_JUMP(o0, f48, 56f)
1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f2, %f4, %f48
1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
- STORE_JUMP(o0, f48, 41f) membar #Sync
+ STORE_JUMP(o0, f48, 41f)
2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
- STORE_JUMP(o0, f48, 49f) membar #Sync
+ STORE_JUMP(o0, f48, 49f)
3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
- STORE_JUMP(o0, f48, 57f) membar #Sync
+ STORE_JUMP(o0, f48, 57f)
1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f4, %f6, %f48
1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
- STORE_JUMP(o0, f48, 42f) membar #Sync
+ STORE_JUMP(o0, f48, 42f)
2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
- STORE_JUMP(o0, f48, 50f) membar #Sync
+ STORE_JUMP(o0, f48, 50f)
3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
- STORE_JUMP(o0, f48, 58f) membar #Sync
+ STORE_JUMP(o0, f48, 58f)
1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f6, %f8, %f48
1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
- STORE_JUMP(o0, f48, 43f) membar #Sync
+ STORE_JUMP(o0, f48, 43f)
2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
- STORE_JUMP(o0, f48, 51f) membar #Sync
+ STORE_JUMP(o0, f48, 51f)
3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
- STORE_JUMP(o0, f48, 59f) membar #Sync
+ STORE_JUMP(o0, f48, 59f)
1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f8, %f10, %f48
1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
- STORE_JUMP(o0, f48, 44f) membar #Sync
+ STORE_JUMP(o0, f48, 44f)
2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
- STORE_JUMP(o0, f48, 52f) membar #Sync
+ STORE_JUMP(o0, f48, 52f)
3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
- STORE_JUMP(o0, f48, 60f) membar #Sync
+ STORE_JUMP(o0, f48, 60f)
1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f10, %f12, %f48
1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
- STORE_JUMP(o0, f48, 45f) membar #Sync
+ STORE_JUMP(o0, f48, 45f)
2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
- STORE_JUMP(o0, f48, 53f) membar #Sync
+ STORE_JUMP(o0, f48, 53f)
3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
- STORE_JUMP(o0, f48, 61f) membar #Sync
+ STORE_JUMP(o0, f48, 61f)
1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f12, %f14, %f48
1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
- STORE_JUMP(o0, f48, 46f) membar #Sync
+ STORE_JUMP(o0, f48, 46f)
2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
- STORE_JUMP(o0, f48, 54f) membar #Sync
+ STORE_JUMP(o0, f48, 54f)
3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
- STORE_JUMP(o0, f48, 62f) membar #Sync
+ STORE_JUMP(o0, f48, 62f)
1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
ba,pt %xcc, 1b+4
faligndata %f14, %f16, %f48
1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
- STORE_JUMP(o0, f48, 47f) membar #Sync
+ STORE_JUMP(o0, f48, 47f)
2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
- STORE_JUMP(o0, f48, 55f) membar #Sync
+ STORE_JUMP(o0, f48, 55f)
3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
- STORE_SYNC(o0, f48) membar #Sync
+ STORE_SYNC(o0, f48)
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
- STORE_JUMP(o0, f48, 63f) membar #Sync
+ STORE_JUMP(o0, f48, 63f)
40: FINISH_VISCHUNK(o0, f0, f2, g3)
41: FINISH_VISCHUNK(o0, f2, f4, g3)
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S
index 65e328d600a..4e18989bd60 100644
--- a/arch/sparc64/lib/VISsave.S
+++ b/arch/sparc64/lib/VISsave.S
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
stda %f48, [%g3 + %g1] ASI_BLK_P
5: membar #Sync
- jmpl %g7 + %g0, %g0
+ ba,pt %xcc, 80f
+ nop
+
+ .align 32
+80: jmpl %g7 + %g0, %g0
nop
6: ldub [%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
stda %f32, [%g2 + %g1] ASI_BLK_P
stda %f48, [%g3 + %g1] ASI_BLK_P
membar #Sync
- jmpl %g7 + %g0, %g0
+ ba,pt %xcc, 80f
+ nop
+ .align 32
+80: jmpl %g7 + %g0, %g0
nop
.align 32
@@ -126,6 +133,10 @@ VISenterhalf:
stda %f0, [%g2 + %g1] ASI_BLK_P
stda %f16, [%g3 + %g1] ASI_BLK_P
membar #Sync
+ ba,pt %xcc, 4f
+ nop
+
+ .align 32
4: and %o5, FPRS_DU, %o5
jmpl %g7 + %g0, %g0
wr %o5, FPRS_FEF, %fprs
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S
index e528b8d1a3e..faf87c31598 100644
--- a/arch/sparc64/lib/atomic.S
+++ b/arch/sparc64/lib/atomic.S
@@ -7,18 +7,6 @@
#include <linux/config.h>
#include <asm/asi.h>
- /* On SMP we need to use memory barriers to ensure
- * correct memory operation ordering, nop these out
- * for uniprocessor.
- */
-#ifdef CONFIG_SMP
-#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad
-#define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore
-#else
-#define ATOMIC_PRE_BARRIER nop
-#define ATOMIC_POST_BARRIER nop
-#endif
-
.text
/* Two versions of the atomic routines, one that
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
nop
.size atomic_sub, .-atomic_sub
+ /* On SMP we need to use memory barriers to ensure
+ * correct memory operation ordering, nop these out
+ * for uniprocessor.
+ */
+#ifdef CONFIG_SMP
+
+#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad;
+#define ATOMIC_POST_BARRIER \
+ ba,pt %xcc, 80b; \
+ membar #StoreLoad | #StoreStore
+
+80: retl
+ nop
+#else
+#define ATOMIC_PRE_BARRIER
+#define ATOMIC_POST_BARRIER
+#endif
+
.globl atomic_add_ret
.type atomic_add_ret,#function
atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
cmp %g1, %g7
bne,pn %icc, 1b
add %g7, %o0, %g7
+ sra %g7, 0, %o0
ATOMIC_POST_BARRIER
retl
- sra %g7, 0, %o0
+ nop
.size atomic_add_ret, .-atomic_add_ret
.globl atomic_sub_ret
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
cmp %g1, %g7
bne,pn %icc, 1b
sub %g7, %o0, %g7
+ sra %g7, 0, %o0
ATOMIC_POST_BARRIER
retl
- sra %g7, 0, %o0
+ nop
.size atomic_sub_ret, .-atomic_sub_ret
.globl atomic64_add
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
cmp %g1, %g7
bne,pn %xcc, 1b
add %g7, %o0, %g7
+ mov %g7, %o0
ATOMIC_POST_BARRIER
retl
- mov %g7, %o0
+ nop
.size atomic64_add_ret, .-atomic64_add_ret
.globl atomic64_sub_ret
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
cmp %g1, %g7
bne,pn %xcc, 1b
sub %g7, %o0, %g7
+ mov %g7, %o0
ATOMIC_POST_BARRIER
retl
- mov %g7, %o0
+ nop
.size atomic64_sub_ret, .-atomic64_sub_ret
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S
index 886dcd2b376..31afbfe6c1e 100644
--- a/arch/sparc64/lib/bitops.S
+++ b/arch/sparc64/lib/bitops.S
@@ -7,20 +7,26 @@
#include <linux/config.h>
#include <asm/asi.h>
+ .text
+
/* On SMP we need to use memory barriers to ensure
* correct memory operation ordering, nop these out
* for uniprocessor.
*/
+
#ifdef CONFIG_SMP
#define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad
-#define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore
+#define BITOP_POST_BARRIER \
+ ba,pt %xcc, 80b; \
+ membar #StoreLoad | #StoreStore
+
+80: retl
+ nop
#else
-#define BITOP_PRE_BARRIER nop
-#define BITOP_POST_BARRIER nop
+#define BITOP_PRE_BARRIER
+#define BITOP_POST_BARRIER
#endif
- .text
-
.globl test_and_set_bit
.type test_and_set_bit,#function
test_and_set_bit: /* %o0=nr, %o1=addr */
@@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */
cmp %g7, %g1
bne,pn %xcc, 1b
and %g7, %o2, %g2
- BITOP_POST_BARRIER
clr %o0
+ movrne %g2, 1, %o0
+ BITOP_POST_BARRIER
retl
- movrne %g2, 1, %o0
+ nop
.size test_and_set_bit, .-test_and_set_bit
.globl test_and_clear_bit
@@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */
cmp %g7, %g1
bne,pn %xcc, 1b
and %g7, %o2, %g2
- BITOP_POST_BARRIER
clr %o0
+ movrne %g2, 1, %o0
+ BITOP_POST_BARRIER
retl
- movrne %g2, 1, %o0
+ nop
.size test_and_clear_bit, .-test_and_clear_bit
.globl test_and_change_bit
@@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */
cmp %g7, %g1
bne,pn %xcc, 1b
and %g7, %o2, %g2
- BITOP_POST_BARRIER
clr %o0
+ movrne %g2, 1, %o0
+ BITOP_POST_BARRIER
retl
- movrne %g2, 1, %o0
+ nop
.size test_and_change_bit, .-test_and_change_bit
.globl set_bit
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c
index c421e0c6532..f03344cf784 100644
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -252,8 +252,9 @@ wlock_again:
" andn %%g1, %%g3, %%g7\n"
" casx [%0], %%g1, %%g7\n"
" cmp %%g1, %%g7\n"
+" membar #StoreLoad | #StoreStore\n"
" bne,pn %%xcc, 1b\n"
-" membar #StoreLoad | #StoreStore"
+" nop"
: /* no outputs */
: "r" (&(rw->lock))
: "g3", "g1", "g7", "cc", "memory");
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
" andn %%g1, %%g3, %%g7\n"
" casx [%0], %%g1, %%g7\n"
" cmp %%g1, %%g7\n"
+" membar #StoreLoad | #StoreStore\n"
" bne,pn %%xcc, 1b\n"
-" membar #StoreLoad | #StoreStore"
+" nop"
: /* no outputs */
: "r" (&(rw->lock))
: "g3", "g1", "g7", "cc", "memory");
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S
index 7e6fdaebedb..8ee288dd0af 100644
--- a/arch/sparc64/lib/dec_and_lock.S
+++ b/arch/sparc64/lib/dec_and_lock.S
@@ -48,8 +48,9 @@ start_to_zero:
#endif
to_zero:
ldstub [%o1], %g3
+ membar #StoreLoad | #StoreStore
brnz,pn %g3, spin_on_lock
- membar #StoreLoad | #StoreStore
+ nop
loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
cmp %g2, %g7
@@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
nop
spin_on_lock:
ldub [%o1], %g3
+ membar #LoadLoad
brnz,pt %g3, spin_on_lock
- membar #LoadLoad
+ nop
ba,pt %xcc, to_zero
nop
nop
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S
index 174ff7b9164..75f0e6b951d 100644
--- a/arch/sparc64/lib/rwsem.S
+++ b/arch/sparc64/lib/rwsem.S
@@ -17,8 +17,9 @@ __down_read:
bne,pn %icc, 1b
add %g7, 1, %g7
cmp %g7, 0
+ membar #StoreLoad | #StoreStore
bl,pn %icc, 3f
- membar #StoreLoad | #StoreStore
+ nop
2:
retl
nop
@@ -57,8 +58,9 @@ __down_write:
cmp %g3, %g7
bne,pn %icc, 1b
cmp %g7, 0
+ membar #StoreLoad | #StoreStore
bne,pn %icc, 3f
- membar #StoreLoad | #StoreStore
+ nop
2: retl
nop
3:
@@ -97,8 +99,9 @@ __up_read:
cmp %g1, %g7
bne,pn %icc, 1b
cmp %g7, 0
+ membar #StoreLoad | #StoreStore
bl,pn %icc, 3f
- membar #StoreLoad | #StoreStore
+ nop
2: retl
nop
3: sethi %hi(RWSEM_ACTIVE_MASK), %g1
@@ -126,8 +129,9 @@ __up_write:
bne,pn %icc, 1b
sub %g7, %g1, %g7
cmp %g7, 0
+ membar #StoreLoad | #StoreStore
bl,pn %icc, 3f
- membar #StoreLoad | #StoreStore
+ nop
2:
retl
nop
@@ -151,8 +155,9 @@ __downgrade_write:
bne,pn %icc, 1b
sub %g7, %g1, %g7
cmp %g7, 0
+ membar #StoreLoad | #StoreStore
bl,pn %icc, 3f
- membar #StoreLoad | #StoreStore
+ nop
2:
retl
nop
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 9c5222075da..8fc413cb6ac 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
"or %%g1, %0, %%g1\n\t"
"casx [%2], %%g7, %%g1\n\t"
"cmp %%g7, %%g1\n\t"
+ "membar #StoreLoad | #StoreStore\n\t"
"bne,pn %%xcc, 1b\n\t"
- " membar #StoreLoad | #StoreStore"
+ " nop"
: /* no outputs */
: "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
: "g1", "g7");
@@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
" andn %%g7, %1, %%g1\n\t"
"casx [%2], %%g7, %%g1\n\t"
"cmp %%g7, %%g1\n\t"
+ "membar #StoreLoad | #StoreStore\n\t"
"bne,pn %%xcc, 1b\n\t"
- " membar #StoreLoad | #StoreStore\n"
+ " nop\n"
"2:"
: /* no outputs */
: "r" (cpu), "r" (mask), "r" (&page->flags),
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 7a093432101..7a2431d3abc 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */
andn %o3, 1, %o3
stxa %g0, [%o3] ASI_IMMU_DEMAP
2: stxa %g0, [%o3] ASI_DMMU_DEMAP
+ membar #Sync
brnz,pt %o1, 1b
- membar #Sync
+ nop
stxa %g2, [%o4] ASI_DMMU
flush %g6
wrpr %g0, 0, %tl
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h
index bf2ae90ed3d..a1cc94f9598 100644
--- a/include/asm-sparc64/rwsem.h
+++ b/include/asm-sparc64/rwsem.h
@@ -55,8 +55,9 @@ static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
"add %%g1, %1, %%g7\n\t"
"cas [%2], %%g1, %%g7\n\t"
"cmp %%g1, %%g7\n\t"
+ "membar #StoreLoad | #StoreStore\n\t"
"bne,pn %%icc, 1b\n\t"
- " membar #StoreLoad | #StoreStore\n\t"
+ " nop\n\t"
"mov %%g7, %0\n\t"
: "=&r" (tmp)
: "0" (tmp), "r" (sem)
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h
index db7581bdb53..9cb93a5c2b4 100644
--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -52,12 +52,14 @@ static inline void _raw_spin_lock(spinlock_t *lock)
__asm__ __volatile__(
"1: ldstub [%1], %0\n"
+" membar #StoreLoad | #StoreStore\n"
" brnz,pn %0, 2f\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
" .subsection 2\n"
"2: ldub [%1], %0\n"
+" membar #LoadLoad\n"
" brnz,pt %0, 2b\n"
-" membar #LoadLoad\n"
+" nop\n"
" ba,a,pt %%xcc, 1b\n"
" .previous"
: "=&r" (tmp)
@@ -95,16 +97,18 @@ static inline void _raw_spin_lock_flags(spinlock_t *lock, unsigned long flags)
__asm__ __volatile__(
"1: ldstub [%2], %0\n"
-" brnz,pn %0, 2f\n"
" membar #StoreLoad | #StoreStore\n"
+" brnz,pn %0, 2f\n"
+" nop\n"
" .subsection 2\n"
"2: rdpr %%pil, %1\n"
" wrpr %3, %%pil\n"
"3: ldub [%2], %0\n"
-" brnz,pt %0, 3b\n"
" membar #LoadLoad\n"
+" brnz,pt %0, 3b\n"
+" nop\n"
" ba,pt %%xcc, 1b\n"
-" wrpr %1, %%pil\n"
+" wrpr %1, %%pil\n"
" .previous"
: "=&r" (tmp1), "=&r" (tmp2)
: "r"(lock), "r"(flags)
@@ -162,12 +166,14 @@ static void inline __read_lock(rwlock_t *lock)
"4: add %0, 1, %1\n"
" cas [%2], %0, %1\n"
" cmp %0, %1\n"
+" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
" .subsection 2\n"
"2: ldsw [%2], %0\n"
+" membar #LoadLoad\n"
" brlz,pt %0, 2b\n"
-" membar #LoadLoad\n"
+" nop\n"
" ba,a,pt %%xcc, 4b\n"
" .previous"
: "=&r" (tmp1), "=&r" (tmp2)
@@ -204,12 +210,14 @@ static void inline __write_lock(rwlock_t *lock)
"4: or %0, %3, %1\n"
" cas [%2], %0, %1\n"
" cmp %0, %1\n"
+" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
" .subsection 2\n"
"2: lduw [%2], %0\n"
+" membar #LoadLoad\n"
" brnz,pt %0, 2b\n"
-" membar #LoadLoad\n"
+" nop\n"
" ba,a,pt %%xcc, 4b\n"
" .previous"
: "=&r" (tmp1), "=&r" (tmp2)
@@ -240,8 +248,9 @@ static int inline __write_trylock(rwlock_t *lock)
" or %0, %4, %1\n"
" cas [%3], %0, %1\n"
" cmp %0, %1\n"
+" membar #StoreLoad | #StoreStore\n"
" bne,pn %%icc, 1b\n"
-" membar #StoreLoad | #StoreStore\n"
+" nop\n"
" mov 1, %2\n"
"2:"
: "=&r" (tmp1), "=&r" (tmp2), "=&r" (result)
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
index 9d7613eea81..1aa932773af 100644
--- a/include/asm-sparc64/spitfire.h
+++ b/include/asm-sparc64/spitfire.h
@@ -111,7 +111,6 @@ static __inline__ void spitfire_put_dcache_tag(unsigned long addr, unsigned long
"membar #Sync"
: /* No outputs */
: "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG));
- __asm__ __volatile__ ("membar #Sync" : : : "memory");
}
/* The instruction cache lines are flushed with this, but note that