From f91e2c3bd427239c198351f44814dd39db91afe0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Dec 2010 16:52:04 +0100 Subject: ARM: 6527/1: Use CTR instead of CCSIDR for the D-cache line size on ARMv7 The current implementation of the dcache_line_size macro reads the L1 cache size from the CCSIDR register. This, however, is not guaranteed to be the smallest cache line in the cache hierarchy. The patch changes to the macro to use the more architecturally correct CTR register. Reported-by: Kevin Sapp Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/proc-macros.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm/mm/proc-macros.S') diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 7d63beaf974..321555b894d 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -61,14 +61,14 @@ .endm /* - * cache_line_size - get the cache line size from the CSIDR register - * (available on ARMv7+). It assumes that the CSSR register was configured - * to access the L1 data cache CSIDR. + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. */ .macro dcache_line_size, reg, tmp - mrc p15, 1, \tmp, c0, c0, 0 @ read CSIDR - and \tmp, \tmp, #7 @ cache line size encoding - mov \reg, #16 @ size offset + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word mov \reg, \reg, lsl \tmp @ actual cache line size .endm -- cgit v1.2.3-70-g09d2 From da30e0ac0f9a521f0cfec8145ddd1ad131f66d61 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Dec 2010 16:56:29 +0100 Subject: ARM: 6528/1: Use CTR for the I-cache line size on ARMv7 The current implementation of the v7_coherent_*_range function assumes that the D and I cache lines have the same size, which is incorrect architecturally. This patch adds the icache_line_size macro which reads the CTR register. The main loop in v7_coherent_*_range is split in two independent loops or the D and I caches. This also has the performance advantage that the DSB is moved outside the main loop. Reported-by: Kevin Sapp Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/cache-v7.S | 27 +++++++++++++++++---------- arch/arm/mm/proc-macros.S | 10 ++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'arch/arm/mm/proc-macros.S') diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index a3ebf7a4f49..6136e68ce95 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -173,15 +173,22 @@ ENTRY(v7_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 - bic r0, r0, r3 + bic r12, r0, r3 1: - USER( mcr p15, 0, r0, c7, c11, 1 ) @ clean D line to the point of unification + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b dsb - USER( mcr p15, 0, r0, c7, c5, 1 ) @ invalidate I line - add r0, r0, r2 + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 2: - cmp r0, r1 - blo 1b + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b +3: mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB @@ -194,10 +201,10 @@ ENTRY(v7_coherent_user_range) * isn't mapped, just try the next page. */ 9001: - mov r0, r0, lsr #12 - mov r0, r0, lsl #12 - add r0, r0, #4096 - b 2b + mov r12, r12, lsr #12 + mov r12, r12, lsl #12 + add r12, r12, #4096 + b 3b UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 321555b894d..b795afd0a2c 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -72,6 +72,16 @@ mov \reg, \reg, lsl \tmp @ actual cache line size .endm +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm /* * Sanity check the PTE configuration for the code below - which makes -- cgit v1.2.3-70-g09d2