diff options
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 61 | ||||
-rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 25 | ||||
-rw-r--r-- | arch/mips/lib/memcpy.S | 60 | ||||
-rw-r--r-- | arch/mips/lib/memset.S | 11 | ||||
-rw-r--r-- | arch/mips/lib/strncpy_user.S | 4 |
5 files changed, 130 insertions, 31 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index c0a77fe038b..957a82484e3 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -7,6 +7,7 @@ * * Copyright (C) 1998, 1999 Ralf Baechle * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2007 Maciej W. Rozycki */ #include <linux/errno.h> #include <asm/asm.h> @@ -52,9 +53,12 @@ #define UNIT(unit) ((unit)*NBYTES) #define ADDC(sum,reg) \ + .set push; \ + .set noat; \ ADD sum, reg; \ sltu v1, sum, reg; \ - ADD sum, v1 + ADD sum, v1; \ + .set pop #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ LOAD _t0, (offset + UNIT(0))(src); \ @@ -178,8 +182,10 @@ move_128bytes: CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) LONG_SUBU t8, t8, 0x01 + .set reorder /* DADDI_WAR */ + PTR_ADDU src, src, 0x80 bnez t8, move_128bytes - PTR_ADDU src, src, 0x80 + .set noreorder 1: beqz t2, 1f @@ -208,8 +214,10 @@ end_words: lw t0, (src) LONG_SUBU t8, t8, 0x1 ADDC(sum, t0) + .set reorder /* DADDI_WAR */ + PTR_ADDU src, src, 0x4 bnez t8, end_words - PTR_ADDU src, src, 0x4 + .set noreorder /* unknown src alignment and < 8 bytes to go */ small_csumcpy: @@ -246,6 +254,8 @@ small_csumcpy: 1: ADDC(sum, t1) /* fold checksum */ + .set push + .set noat #ifdef USE_DOUBLE dsll32 v1, sum, 0 daddu sum, v1 @@ -266,6 +276,7 @@ small_csumcpy: srl sum, sum, 8 or sum, v1 andi sum, 0xffff + .set pop 1: .set reorder /* Add the passed partial csum. */ @@ -373,7 +384,11 @@ small_csumcpy: #define ADDRMASK (NBYTES-1) +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS .set noat +#else + .set at=v1 +#endif LEAF(__csum_partial_copy_user) PTR_ADDU AT, src, len /* See (1) above. */ @@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc) ADDC(sum, t6) EXC( STORE t7, UNIT(7)(dst), s_exc) ADDC(sum, t7) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 8*NBYTES bgez len, 1b - ADD dst, dst, 8*NBYTES + .set noreorder ADD len, 8*NBYTES # revert len (see above) /* @@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) ADDC(sum, t2) EXC( STORE t3, UNIT(3)(dst), s_exc) ADDC(sum, t3) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES beqz len, done - ADD dst, dst, 4*NBYTES + .set noreorder less_than_4units: /* * rem = len % NBYTES @@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc) SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc) ADDC(sum, t0) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne rem, len, 1b - ADD dst, dst, NBYTES + .set noreorder /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) @@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) ADDC(sum, t2) EXC( STORE t3, UNIT(3)(dst), s_exc) ADDC(sum, t3) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES bne len, rem, 1b - ADD dst, dst, 4*NBYTES + .set noreorder cleanup_src_unaligned: beqz len, done @@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc) ADDC(sum, t0) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne len, rem, 1b - ADD dst, dst, NBYTES + .set noreorder copy_bytes_checklen: beqz len, done @@ -631,6 +656,8 @@ copy_bytes_done: ADDC(sum, t2) done: /* fold checksum */ + .set push + .set noat #ifdef USE_DOUBLE dsll32 v1, sum, 0 daddu sum, v1 @@ -651,6 +678,7 @@ done: srl sum, sum, 8 or sum, v1 andi sum, 0xffff + .set pop 1: .set reorder ADDC(sum, psum) @@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc) SLLV t1, t1, t2 addu t2, SHIFT_INC ADDC(sum, t1) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 1 bne src, t0, 1b - ADD dst, dst, 1 + .set noreorder l_exc: LOAD t0, TI_TASK($28) nop @@ -697,12 +727,22 @@ l_exc: * Clear len bytes starting at dst. Can't call __bzero because it * might modify len. An inefficient loop for these rare times... */ + .set reorder /* DADDI_WAR */ + SUB src, len, 1 beqz len, done - SUB src, len, 1 + .set noreorder 1: sb zero, 0(dst) ADD dst, dst, 1 + .set push + .set noat +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS bnez src, 1b SUB src, src, 1 +#else + li v1, 1 + bnez src, 1b + SUB src, src, v1 +#endif li v1, -EFAULT b done sw v1, (errptr) @@ -712,4 +752,5 @@ s_exc: li v1, -EFAULT jr ra sw v1, (errptr) + .set pop END(__csum_partial_copy_user) diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S index 3a534b2baa0..d1b08f5d686 100644 --- a/arch/mips/lib/memcpy-inatomic.S +++ b/arch/mips/lib/memcpy-inatomic.S @@ -9,6 +9,7 @@ * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. * Copyright (C) 2002 Broadcom, Inc. * memcpy/copy_user author: Mark Vandevoorde + * Copyright (C) 2007 Maciej W. Rozycki * * Mnemonic names for arguments to memcpy/__copy_user */ @@ -175,7 +176,11 @@ .text .set noreorder +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS .set noat +#else + .set at=v1 +#endif /* * A combined memcpy/__copy_user @@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy) STORE t1, UNIT(1)(dst) STORE t2, UNIT(2)(dst) STORE t3, UNIT(3)(dst) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES beqz len, done - ADD dst, dst, 4*NBYTES + .set noreorder less_than_4units: /* * rem = len % NBYTES @@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc) ADD src, src, NBYTES SUB len, len, NBYTES STORE t0, 0(dst) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne rem, len, 1b - ADD dst, dst, NBYTES + .set noreorder /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) @@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) STORE t2, UNIT(2)(dst) STORE t3, UNIT(3)(dst) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES bne len, rem, 1b - ADD dst, dst, 4*NBYTES + .set noreorder cleanup_src_unaligned: beqz len, done @@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ADD src, src, NBYTES SUB len, len, NBYTES STORE t0, 0(dst) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne len, rem, 1b - ADD dst, dst, NBYTES + .set noreorder copy_bytes_checklen: beqz len, done @@ -424,8 +437,10 @@ l_exc_copy: EXC( lb t1, 0(src), l_exc) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user + .set reorder /* DADDI_WAR */ + ADD dst, dst, 1 bne src, t0, 1b - ADD dst, dst, 1 + .set noreorder l_exc: LOAD t0, TI_TASK($28) nop diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index a526c62cb76..aded7b15905 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -9,6 +9,7 @@ * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. * Copyright (C) 2002 Broadcom, Inc. * memcpy/copy_user author: Mark Vandevoorde + * Copyright (C) 2007 Maciej W. Rozycki * * Mnemonic names for arguments to memcpy/__copy_user */ @@ -175,7 +176,11 @@ .text .set noreorder +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS .set noat +#else + .set at=v1 +#endif /* * A combined memcpy/__copy_user @@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES beqz len, done - ADD dst, dst, 4*NBYTES + .set noreorder less_than_4units: /* * rem = len % NBYTES @@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc) ADD src, src, NBYTES SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne rem, len, 1b - ADD dst, dst, NBYTES + .set noreorder /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) @@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) + .set reorder /* DADDI_WAR */ + ADD dst, dst, 4*NBYTES bne len, rem, 1b - ADD dst, dst, 4*NBYTES + .set noreorder cleanup_src_unaligned: beqz len, done @@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ADD src, src, NBYTES SUB len, len, NBYTES EXC( STORE t0, 0(dst), s_exc_p1u) + .set reorder /* DADDI_WAR */ + ADD dst, dst, NBYTES bne len, rem, 1b - ADD dst, dst, NBYTES + .set noreorder copy_bytes_checklen: beqz len, done @@ -427,8 +440,10 @@ l_exc_copy: EXC( lb t1, 0(src), l_exc) ADD src, src, 1 sb t1, 0(dst) # can't fault -- we're copy_from_user + .set reorder /* DADDI_WAR */ + ADD dst, dst, 1 bne src, t0, 1b - ADD dst, dst, 1 + .set noreorder l_exc: LOAD t0, TI_TASK($28) nop @@ -446,20 +461,33 @@ l_exc: * Clear len bytes starting at dst. Can't call __bzero because it * might modify len. An inefficient loop for these rare times... */ + .set reorder /* DADDI_WAR */ + SUB src, len, 1 beqz len, done - SUB src, len, 1 + .set noreorder 1: sb zero, 0(dst) ADD dst, dst, 1 +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS bnez src, 1b SUB src, src, 1 +#else + .set push + .set noat + li v1, 1 + bnez src, 1b + SUB src, src, v1 + .set pop +#endif jr ra nop -#define SEXC(n) \ -s_exc_p ## n ## u: \ - jr ra; \ - ADD len, len, n*NBYTES +#define SEXC(n) \ + .set reorder; /* DADDI_WAR */ \ +s_exc_p ## n ## u: \ + ADD len, len, n*NBYTES; \ + jr ra; \ + .set noreorder SEXC(8) SEXC(7) @@ -471,8 +499,10 @@ SEXC(2) SEXC(1) s_exc_p1: + .set reorder /* DADDI_WAR */ + ADD len, len, 1 jr ra - ADD len, len, 1 + .set noreorder s_exc: jr ra nop @@ -502,8 +532,10 @@ r_end_bytes: SUB a2, a2, 0x1 sb t0, -1(a0) SUB a1, a1, 0x1 + .set reorder /* DADDI_WAR */ + SUB a0, a0, 0x1 bnez a2, r_end_bytes - SUB a0, a0, 0x1 + .set noreorder r_out: jr ra @@ -514,8 +546,10 @@ r_end_bytes_up: SUB a2, a2, 0x1 sb t0, (a0) ADD a1, a1, 0x1 + .set reorder /* DADDI_WAR */ + ADD a0, a0, 0x1 bnez a2, r_end_bytes_up - ADD a0, a0, 0x1 + .set noreorder jr ra move a2, zero diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3f8b8b3d0b2..3bf38422342 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -5,6 +5,7 @@ * * Copyright (C) 1998, 1999, 2000 by Ralf Baechle * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2007 Maciej W. Rozycki */ #include <asm/asm.h> #include <asm/asm-offsets.h> @@ -74,8 +75,16 @@ FEXPORT(__bzero) bnez t0, small_memset andi t0, a0, LONGMASK /* aligned? */ +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS beqz t0, 1f PTR_SUBU t0, LONGSIZE /* alignment in bytes */ +#else + .set noat + li AT, LONGSIZE + beqz t0, 1f + PTR_SUBU t0, AT /* alignment in bytes */ + .set at +#endif #ifdef __MIPSEB__ EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ @@ -106,7 +115,7 @@ memset_partial: .set noat LONG_SRL AT, t0, 1 PTR_SUBU t1, AT - .set noat + .set at #endif jr t1 PTR_ADDU a0, t0 /* dest ptr */ diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index d16c76fbfac..5c8fb9d6b7f 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -41,9 +41,9 @@ FEXPORT(__strncpy_from_user_nocheck_asm) beqz t0, 2f sb t0, (a0) PTR_ADDIU v0, 1 - bne v0, a2, 1b - PTR_ADDIU a0, 1 .set reorder + PTR_ADDIU a0, 1 + bne v0, a2, 1b 2: PTR_ADDU t0, a1, v0 xor t0, a1 bltz t0, fault |