diff options
author | Paul Mundt <lethal@linux-sh.org> | 2007-11-08 18:51:33 +0900 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2008-01-28 13:18:39 +0900 |
commit | dd730b8ff8adfdf8d39cd060acc7223f0d374d32 (patch) | |
tree | 24fd26e5c6e6ae72f921530693d0cf923962a895 /arch/sh64/lib/page_copy.S | |
parent | 2c6deb5ea414c26483578d41d8537d54dd2d282f (diff) |
sh: Move arch/sh64/lib to arch/sh/lib64.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh64/lib/page_copy.S')
-rw-r--r-- | arch/sh64/lib/page_copy.S | 91 |
1 files changed, 0 insertions, 91 deletions
diff --git a/arch/sh64/lib/page_copy.S b/arch/sh64/lib/page_copy.S deleted file mode 100644 index e159c3cd258..00000000000 --- a/arch/sh64/lib/page_copy.S +++ /dev/null @@ -1,91 +0,0 @@ -/* - Copyright 2003 Richard Curnow, SuperH (UK) Ltd. - - This file is subject to the terms and conditions of the GNU General Public - License. See the file "COPYING" in the main directory of this archive - for more details. - - Tight version of mempy for the case of just copying a page. - Prefetch strategy empirically optimised against RTL simulations - of SH5-101 cut2 eval chip with Cayman board DDR memory. - - Parameters: - r2 : source effective address (start of page) - r3 : destination effective address (start of page) - - Always copies 4096 bytes. - - Points to review. - * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead. - It seems like the prefetch needs to be at at least 4 lines ahead to get - the data into the cache in time, and the allocos contend with outstanding - prefetches for the same cache set, so it's better to have the numbers - different. - */ - - .section .text..SHmedia32,"ax" - .little - - .balign 8 - .global sh64_page_copy -sh64_page_copy: - - /* Copy 4096 bytes worth of data from r2 to r3. - Do prefetches 4 lines ahead. - Do alloco 2 lines ahead */ - - pta 1f, tr1 - pta 2f, tr2 - pta 3f, tr3 - ptabs r18, tr0 - -#if 0 - /* TAKum03020 */ - ld.q r2, 0x00, r63 - ld.q r2, 0x20, r63 - ld.q r2, 0x40, r63 - ld.q r2, 0x60, r63 -#endif - alloco r3, 0x00 - synco ! TAKum03020 - alloco r3, 0x20 - synco ! TAKum03020 - - movi 3968, r6 - add r3, r6, r6 - addi r6, 64, r7 - addi r7, 64, r8 - sub r2, r3, r60 - addi r60, 8, r61 - addi r61, 8, r62 - addi r62, 8, r23 - addi r60, 0x80, r22 - -/* Minimal code size. The extra branches inside the loop don't cost much - because they overlap with the time spent waiting for prefetches to - complete. */ -1: -#if 0 - /* TAKum03020 */ - bge/u r3, r6, tr2 ! skip prefetch for last 4 lines - ldx.q r3, r22, r63 ! prefetch 4 lines hence -#endif -2: - bge/u r3, r7, tr3 ! skip alloco for last 2 lines - alloco r3, 0x40 ! alloc destination line 2 lines ahead - synco ! TAKum03020 -3: - ldx.q r3, r60, r36 - ldx.q r3, r61, r37 - ldx.q r3, r62, r38 - ldx.q r3, r23, r39 - st.q r3, 0, r36 - st.q r3, 8, r37 - st.q r3, 16, r38 - st.q r3, 24, r39 - addi r3, 32, r3 - bgt/l r8, r3, tr1 - - blink tr0, r63 ! return - - |