From f91e2c3bd427239c198351f44814dd39db91afe0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 7 Dec 2010 16:52:04 +0100
Subject: ARM: 6527/1: Use CTR instead of CCSIDR for the D-cache line size on
 ARMv7

The current implementation of the dcache_line_size macro reads the L1
cache size from the CCSIDR register. This, however, is not guaranteed to
be the smallest cache line in the cache hierarchy. The patch changes to
the macro to use the more architecturally correct CTR register.

Reported-by: Kevin Sapp <ksapp@quicinc.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-macros.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/arm/mm/proc-macros.S')

diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 7d63beaf974..321555b894d 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -61,14 +61,14 @@
 	.endm
 
 /*
- * cache_line_size - get the cache line size from the CSIDR register
- * (available on ARMv7+). It assumes that the CSSR register was configured
- * to access the L1 data cache CSIDR.
+ * dcache_line_size - get the minimum D-cache line size from the CTR register
+ * on ARMv7.
  */
 	.macro	dcache_line_size, reg, tmp
-	mrc	p15, 1, \tmp, c0, c0, 0		@ read CSIDR
-	and	\tmp, \tmp, #7			@ cache line size encoding
-	mov	\reg, #16			@ size offset
+	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+	lsr	\tmp, \tmp, #16
+	and	\tmp, \tmp, #0xf		@ cache line size encoding
+	mov	\reg, #4			@ bytes per word
 	mov	\reg, \reg, lsl \tmp		@ actual cache line size
 	.endm
 
-- 
cgit v1.2.3-70-g09d2


From da30e0ac0f9a521f0cfec8145ddd1ad131f66d61 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 7 Dec 2010 16:56:29 +0100
Subject: ARM: 6528/1: Use CTR for the I-cache line size on ARMv7

The current implementation of the v7_coherent_*_range function assumes
that the D and I cache lines have the same size, which is incorrect
architecturally. This patch adds the icache_line_size macro which reads
the CTR register. The main loop in v7_coherent_*_range is split in two
independent loops or the D and I caches. This also has the performance
advantage that the DSB is moved outside the main loop.

Reported-by: Kevin Sapp <ksapp@quicinc.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-v7.S    | 27 +++++++++++++++++----------
 arch/arm/mm/proc-macros.S | 10 ++++++++++
 2 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'arch/arm/mm/proc-macros.S')

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index a3ebf7a4f49..6136e68ce95 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -173,15 +173,22 @@ ENTRY(v7_coherent_user_range)
  UNWIND(.fnstart		)
 	dcache_line_size r2, r3
 	sub	r3, r2, #1
-	bic	r0, r0, r3
+	bic	r12, r0, r3
 1:
- USER(	mcr	p15, 0, r0, c7, c11, 1	)	@ clean D line to the point of unification
+ USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	1b
 	dsb
- USER(	mcr	p15, 0, r0, c7, c5, 1	)	@ invalidate I line
-	add	r0, r0, r2
+	icache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
 2:
-	cmp	r0, r1
-	blo	1b
+ USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	2b
+3:
 	mov	r0, #0
 	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
@@ -194,10 +201,10 @@ ENTRY(v7_coherent_user_range)
  * isn't mapped, just try the next page.
  */
 9001:
-	mov	r0, r0, lsr #12
-	mov	r0, r0, lsl #12
-	add	r0, r0, #4096
-	b	2b
+	mov	r12, r12, lsr #12
+	mov	r12, r12, lsl #12
+	add	r12, r12, #4096
+	b	3b
  UNWIND(.fnend		)
 ENDPROC(v7_coherent_kern_range)
 ENDPROC(v7_coherent_user_range)
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 321555b894d..b795afd0a2c 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -72,6 +72,16 @@
 	mov	\reg, \reg, lsl \tmp		@ actual cache line size
 	.endm
 
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register
+ * on ARMv7.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+	and	\tmp, \tmp, #0xf		@ cache line size encoding
+	mov	\reg, #4			@ bytes per word
+	mov	\reg, \reg, lsl \tmp		@ actual cache line size
+	.endm
 
 /*
  * Sanity check the PTE configuration for the code below - which makes
-- 
cgit v1.2.3-70-g09d2