diff options
author | Nicolas Pitre <nico@cam.org> | 2008-04-24 02:04:54 +0200 |
---|---|---|
committer | Lennert Buytenhek <buytenh@marvell.com> | 2008-06-22 22:44:58 +0200 |
commit | 6c386e58aadb90fb5d8b5be979e02d74f8be52fe (patch) | |
tree | 89ddc09277ad4191aa5bfd12db20302544ec2294 /arch/arm | |
parent | 79e90dd5aa95adfdc3117db8a559da3d0195ba58 (diff) |
[ARM] Feroceon: speed up flushing of the entire cache
Flushing the L1 D cache with a test/clean/invalidate loop is very
easy in software, but it is not the quickest way of doing it, as
there is a lot of overhead involved in re-scanning the cache from
the beginning every time we hit a dirty line.
This patch makes proc-feroceon.S use "clean+invalidate by set/way"
loops according to possible cache configuration of Feroceon CPUs
(either direct-mapped or 4-way set associative).
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/mm/proc-feroceon.S | 59 |
1 files changed, 48 insertions, 11 deletions
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 12b46d7b7f5..00eadb5995c 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -44,11 +44,31 @@ */ #define CACHE_DLINESIZE 32 + .bss + .align 3 +__cache_params_loc: + .space 8 + .text +__cache_params: + .word __cache_params_loc + /* * cpu_feroceon_proc_init() */ ENTRY(cpu_feroceon_proc_init) + mrc p15, 0, r0, c0, c0, 1 @ read cache type register + ldr r1, __cache_params + mov r2, #(16 << 5) + tst r0, #(1 << 16) @ get way + mov r0, r0, lsr #18 @ get cache size order + movne r3, #((4 - 1) << 30) @ 4-way + and r0, r0, #0xf + moveq r3, #0 @ 1-way + mov r2, r2, lsl r0 @ actual cache size + movne r2, r2, lsr #2 @ turned into # of sets + sub r2, r2, #(1 << 5) + stmia r1, {r2, r3} mov pc, lr /* @@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all) */ ENTRY(feroceon_flush_kern_cache_all) mov r2, #VM_EXEC - mov ip, #0 + __flush_whole_cache: -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate - bne 1b + ldr r1, __cache_params + ldmia r1, {r1, r3} +1: orr ip, r1, r3 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way + subs ip, ip, #(1 << 30) @ next way + bcs 2b + subs r1, r1, #(1 << 5) @ next set + bcs 1b + tst r2, #VM_EXEC + mov ip, #0 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr @@ -138,7 +166,6 @@ __flush_whole_cache: */ .align 5 ENTRY(feroceon_flush_user_cache_range) - mov ip, #0 sub r3, r1, r0 @ calculate total size cmp r3, #CACHE_DLIMIT bgt __flush_whole_cache @@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range) cmp r0, r1 blo 1b tst r2, #VM_EXEC + mov ip, #0 mcrne p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr @@ -306,16 +334,25 @@ ENTRY(cpu_feroceon_dcache_clean_area) .align 5 ENTRY(cpu_feroceon_switch_mm) #ifdef CONFIG_MMU - mov ip, #0 -@ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate - bne 1b - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB + /* + * Note: we wish to call __flush_whole_cache but we need to preserve + * lr to do so. The only way without touching main memory is to + * use r2 which is normally used to test the VM_EXEC flag, and + * compensate locally for the skipped ops if it is not set. + */ + mov r2, lr @ abuse r2 to preserve lr + bl __flush_whole_cache + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already + tst r2, #VM_EXEC + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcreq p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs -#endif + mov pc, r2 +#else mov pc, lr +#endif /* * cpu_feroceon_set_pte_ext(ptep, pte, ext) |