diff options
author | Richard Woodruff <r-woodruff2@ti.com> | 2010-12-20 14:05:03 -0600 |
---|---|---|
committer | Kevin Hilman <khilman@deeprootsystems.com> | 2010-12-21 14:45:47 -0800 |
commit | 0bd40535365c318e331f5e872030a710d5746167 (patch) | |
tree | e08c1d72b3eac6e260f2d8841c465d154a03c2af /arch/arm | |
parent | 1cbbe37ac5c78fb59ce02f639d6c4f69b610cf5e (diff) |
OMAP3: PM: Update clean_l2 to use v7_flush_dcache_all
Analysis in TI kernel with ETM showed that using cache mapped flush
in kernel instead of SO mapped flush cost drops by 65% (3.39mS down
to 1.17mS) for clean_l2 which is used during sleep sequences.
Overall:
- speed up
- unfortunately there isn't a good alternative flush method today
- code reduction and less maintenance and potential bug in
unmaintained code
This also fixes the bug with the clean_l2 function usage.
Reported-by: Tony Lindgren <tony@atomide.com>
Cc: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Tony Lindgren <tony@atomide.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Jean Pihet <j-pihet@ti.com>
[nm@ti.com: ported rkw's proposal to 2.6.37-rc2]
Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Richard Woodruff <r-woodruff2@ti.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/mach-omap2/sleep34xx.S | 80 |
1 files changed, 14 insertions, 66 deletions
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index 2fb205a7f28..aa43da5176e 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -520,72 +520,18 @@ clean_caches: cmp r9, #1 /* Check whether L2 inval is required or not*/ bne skip_l2_inval clean_l2: - /* read clidr */ - mrc p15, 1, r0, c0, c0, 1 - /* extract loc from clidr */ - ands r3, r0, #0x7000000 - /* left align loc bit field */ - mov r3, r3, lsr #23 - /* if loc is 0, then no need to clean */ - beq finished - /* start clean at cache level 0 */ - mov r10, #0 -loop1: - /* work out 3x current cache level */ - add r2, r10, r10, lsr #1 - /* extract cache type bits from clidr*/ - mov r1, r0, lsr r2 - /* mask of the bits for current cache only */ - and r1, r1, #7 - /* see what cache we have at this level */ - cmp r1, #2 - /* skip if no cache, or just i-cache */ - blt skip - /* select current cache level in cssr */ - mcr p15, 2, r10, c0, c0, 0 - /* isb to sych the new cssr&csidr */ - isb - /* read the new csidr */ - mrc p15, 1, r1, c0, c0, 0 - /* extract the length of the cache lines */ - and r2, r1, #7 - /* add 4 (line length offset) */ - add r2, r2, #4 - ldr r4, assoc_mask - /* find maximum number on the way size */ - ands r4, r4, r1, lsr #3 - /* find bit position of way size increment */ - clz r5, r4 - ldr r7, numset_mask - /* extract max number of the index size*/ - ands r7, r7, r1, lsr #13 -loop2: - mov r9, r4 - /* create working copy of max way size*/ -loop3: - /* factor way and cache number into r11 */ - orr r11, r10, r9, lsl r5 - /* factor index number into r11 */ - orr r11, r11, r7, lsl r2 - /*clean & invalidate by set/way */ - mcr p15, 0, r11, c7, c10, 2 - /* decrement the way*/ - subs r9, r9, #1 - bge loop3 - /*decrement the index */ - subs r7, r7, #1 - bge loop2 -skip: - add r10, r10, #2 - /* increment cache number */ - cmp r3, r10 - bgt loop1 -finished: - /*swith back to cache level 0 */ - mov r10, #0 - /* select current cache level in cssr */ - mcr p15, 2, r10, c0, c0, 0 - isb + /* + * Jump out to kernel flush routine + * - reuse that code is better + * - it executes in a cached space so is faster than refetch per-block + * - should be faster and will change with kernel + * - 'might' have to copy address, load and jump to it + * - lr is used since we are running in SRAM currently. + */ + ldr r1, kernel_flush + mov lr, pc + bx r1 + skip_l2_inval: /* Data memory barrier and Data sync barrier */ mov r1, #0 @@ -668,5 +614,7 @@ cache_pred_disable_mask: .word 0xFFFFE7FB control_stat: .word CONTROL_STAT +kernel_flush: + .word v7_flush_dcache_all ENTRY(omap34xx_cpu_suspend_sz) .word . - omap34xx_cpu_suspend |