24 files changed, 1491 insertions, 76 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d490f3773c0..20979564e7e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -186,6 +186,24 @@ config CPU_ARM926T
 	  Say Y if you want support for the ARM926T processor.
 	  Otherwise, say N.
 
+# FA526
+config CPU_FA526
+	bool
+	select CPU_32v4
+	select CPU_ABRT_EV4
+	select CPU_PABRT_NOIFAR
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_CACHE_FA
+	select CPU_COPY_FA if MMU
+	select CPU_TLB_FA if MMU
+	help
+	  The FA526 is a version of the ARMv4 compatible processor with
+	  Branch Target Buffer, Unified TLB and cache line size 16.
+
+	  Say Y if you want support for the FA526 processor.
+	  Otherwise, say N.
+
 # ARM940T
 config CPU_ARM940T
 	bool "Support ARM940T processor" if ARCH_INTEGRATOR
@@ -340,6 +358,17 @@ config CPU_XSC3
 	select CPU_TLB_V4WBI if MMU
 	select IO_36
 
+# Marvell PJ1 (Mohawk)
+config CPU_MOHAWK
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_PABRT_NOIFAR
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_TLB_V4WBI if MMU
+	select CPU_COPY_V4WB if MMU
+
 # Feroceon
 config CPU_FEROCEON
 	bool
@@ -484,6 +513,9 @@ config CPU_CACHE_VIVT
 config CPU_CACHE_VIPT
 	bool
 
+config CPU_CACHE_FA
+	bool
+
 if MMU
 # The copy-page model
 config CPU_COPY_V3
@@ -498,6 +530,9 @@ config CPU_COPY_V4WB
 config CPU_COPY_FEROCEON
 	bool
 
+config CPU_COPY_FA
+	bool
+
 config CPU_COPY_V6
 	bool
 
@@ -528,6 +563,13 @@ config CPU_TLB_FEROCEON
 	help
 	  Feroceon TLB (v4wbi with non-outer-cachable page table walks).
 
+config CPU_TLB_FA
+	bool
+	help
+	  Faraday ARM FA526 architecture, unified TLB with writeback cache
+	  and invalidate instruction cache entry. Branch target buffer is
+	  also supported.
+
 config CPU_TLB_V6
 	bool
 
@@ -569,7 +611,7 @@ comment "Processor Features"
 
 config ARM_THUMB
 	bool "Support Thumb user binaries"
-	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6 || CPU_V7 || CPU_FEROCEON
+	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V7 || CPU_FEROCEON
 	default y
 	help
 	  Say Y if you want to include kernel support for running user space
@@ -638,7 +680,7 @@ config CPU_DCACHE_SIZE
 
 config CPU_DCACHE_WRITETHROUGH
 	bool "Force write through D-cache"
-	depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE
+	depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
 	default y if CPU_ARM925T
 	help
 	  Say Y here to use the data cache in writethrough mode. Unless you
@@ -653,7 +695,7 @@ config CPU_CACHE_ROUND_ROBIN
 
 config CPU_BPREDICT_DISABLE
 	bool "Disable branch prediction"
-	depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+	depends on CPU_ARM1020 || CPU_V6 || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
@@ -704,7 +746,8 @@ config CACHE_FEROCEON_L2_WRITETHROUGH
 
 config CACHE_L2X0
 	bool "Enable the L2x0 outer cache controller"
-	depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || REALVIEW_EB_A9MP
+	depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \
+		   REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31
 	default y
 	select OUTER_CACHE
 	help
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 480f78a3611..63e3f6dd0e2 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_DISCONTIGMEM)	+= discontig.o
+obj-$(CONFIG_HIGHMEM)		+= highmem.o
 
 obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
 obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
@@ -32,6 +33,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT)	+= cache-v4wt.o
 obj-$(CONFIG_CPU_CACHE_V4WB)	+= cache-v4wb.o
 obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
 obj-$(CONFIG_CPU_CACHE_V7)	+= cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA)	+= cache-fa.o
 
 obj-$(CONFIG_CPU_COPY_V3)	+= copypage-v3.o
 obj-$(CONFIG_CPU_COPY_V4WT)	+= copypage-v4wt.o
@@ -41,6 +43,7 @@ obj-$(CONFIG_CPU_COPY_V6)	+= copypage-v6.o context.o
 obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
 obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
 obj-$(CONFIG_CPU_XSC3)		+= copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA)	+= copypage-fa.o
 
 obj-$(CONFIG_CPU_TLB_V3)	+= tlb-v3.o
 obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
@@ -49,6 +52,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI)	+= tlb-v4wbi.o
 obj-$(CONFIG_CPU_TLB_FEROCEON)	+= tlb-v4wbi.o	# reuse v4wbi TLB functions
 obj-$(CONFIG_CPU_TLB_V6)	+= tlb-v6.o
 obj-$(CONFIG_CPU_TLB_V7)	+= tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA)	+= tlb-fa.o
 
 obj-$(CONFIG_CPU_ARM610)	+= proc-arm6_7.o
 obj-$(CONFIG_CPU_ARM710)	+= proc-arm6_7.o
@@ -62,6 +66,7 @@ obj-$(CONFIG_CPU_ARM925T)	+= proc-arm925.o
 obj-$(CONFIG_CPU_ARM926T)	+= proc-arm926.o
 obj-$(CONFIG_CPU_ARM940T)	+= proc-arm940.o
 obj-$(CONFIG_CPU_ARM946E)	+= proc-arm946.o
+obj-$(CONFIG_CPU_FA526)		+= proc-fa526.o
 obj-$(CONFIG_CPU_ARM1020)	+= proc-arm1020.o
 obj-$(CONFIG_CPU_ARM1020E)	+= proc-arm1020e.o
 obj-$(CONFIG_CPU_ARM1022)	+= proc-arm1022.o
@@ -70,6 +75,7 @@ obj-$(CONFIG_CPU_SA110)		+= proc-sa110.o
 obj-$(CONFIG_CPU_SA1100)	+= proc-sa1100.o
 obj-$(CONFIG_CPU_XSCALE)	+= proc-xscale.o
 obj-$(CONFIG_CPU_XSC3)		+= proc-xsc3.o
+obj-$(CONFIG_CPU_MOHAWK)	+= proc-mohawk.o
 obj-$(CONFIG_CPU_FEROCEON)	+= proc-feroceon.o
 obj-$(CONFIG_CPU_V6)		+= proc-v6.o
 obj-$(CONFIG_CPU_V7)		+= proc-v7.o
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 94077fbd96b..6f7e70907e4 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -29,10 +29,10 @@ ENTRY(v6_early_abort)
 	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
 	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
 /*
- * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR.
+ * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR (erratum 326103).
  * The test below covers all the write situations, including Java bytecodes
  */
-	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
 	tst	r3, #PSR_J_BIT			@ Java?
 	movne	pc, lr
 	do_thumb_abort
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 00000000000..b63a8f7b95c
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,220 @@
+/*
+ *  linux/arch/arm/mm/cache-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Processors: FA520 FA526 FA626	
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE	8192
+#else
+#define CACHE_DSIZE	16384 
+#endif 
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT	(CACHE_DSIZE * 2)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(fa_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+	mov	ip, #0
+	mov	r2, #VM_EXEC
+__flush_whole_cache:
+	mcr	p15, 0, ip, c7, c14, 0		@ clean/invalidate D cache
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
+	bhs	__flush_whole_cache		@ flush whole D cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I line
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+	/* fall through */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(kaddr)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- kaddr   - kernel address (guaranteed to be page aligned)
+ */
+ENTRY(fa_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	bic	r1, r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	fa_cache_fns, #object
+ENTRY(fa_cache_fns)
+	.long	fa_flush_kern_cache_all
+	.long	fa_flush_user_cache_all
+	.long	fa_flush_user_cache_range
+	.long	fa_coherent_kern_range
+	.long	fa_coherent_user_range
+	.long	fa_flush_kern_dcache_page
+	.long	fa_dma_inv_range
+	.long	fa_dma_clean_range
+	.long	fa_dma_flush_range
+	.size	fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 80cd207cbae..6e77c042d8e 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -14,8 +14,12 @@
 
 #include <linux/init.h>
 #include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include <plat/cache-feroceon-l2.h>
-
+#include "mm.h"
 
 /*
  * Low-level cache maintenance operations.
@@ -34,14 +38,36 @@
  * The range operations require two successive cp15 writes, in
  * between which we don't want to be preempted.
  */
+
+static inline unsigned long l2_start_va(unsigned long paddr)
+{
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * Let's do our own fixmap stuff in a minimal way here.
+	 * Because range ops can't be done on physical addresses,
+	 * we simply install a virtual mapping for it only for the
+	 * TLB lookup to occur, hence no need to flush the untouched
+	 * memory mapping.  This is protected with the disabling of
+	 * interrupts by the caller.
+	 */
+	unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
+	unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
+	local_flush_tlb_kernel_page(vaddr);
+	return vaddr + (paddr & ~PAGE_MASK);
+#else
+	return __phys_to_virt(paddr);
+#endif
+}
+
 static inline void l2_clean_pa(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
 }
 
-static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
+static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
+	unsigned long va_start, va_end, flags;
 
 	/*
 	 * Make sure 'start' and 'end' reference the same page, as
@@ -51,17 +77,14 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
 	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
 	raw_local_irq_save(flags);
+	va_start = l2_start_va(start);
+	va_end = va_start + (end - start);
 	__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
 		"mcr p15, 1, %1, c15, c9, 5"
-		: : "r" (start), "r" (end));
+		: : "r" (va_start), "r" (va_end));
 	raw_local_irq_restore(flags);
 }
 
-static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
-{
-	l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end));
-}
-
 static inline void l2_clean_inv_pa(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr));
@@ -72,9 +95,9 @@ static inline void l2_inv_pa(unsigned long addr)
 	__asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr));
 }
 
-static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
+static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
 {
-	unsigned long flags;
+	unsigned long va_start, va_end, flags;
 
 	/*
 	 * Make sure 'start' and 'end' reference the same page, as
@@ -84,18 +107,19 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
 	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
 	raw_local_irq_save(flags);
+	va_start = l2_start_va(start);
+	va_end = va_start + (end - start);
 	__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
 		"mcr p15, 1, %1, c15, c11, 5"
-		: : "r" (start), "r" (end));
+		: : "r" (va_start), "r" (va_end));
 	raw_local_irq_restore(flags);
 }
 
-static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
+static inline void l2_inv_all(void)
 {
-	l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end));
+	__asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0));
 }
 
-
 /*
  * Linux primitives.
  *
@@ -234,9 +258,7 @@ static void __init enable_dcache(void)
 
 static void __init __invalidate_icache(void)
 {
-	int dummy;
-
-	__asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0" : "=r" (dummy));
+	__asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
 }
 
 static int __init invalidate_and_disable_icache(void)
@@ -301,6 +323,7 @@ static void __init enable_l2(void)
 
 		d = flush_and_disable_dcache();
 		i = invalidate_and_disable_icache();
+		l2_inv_all();
 		write_extra_features(u | 0x00400000);
 		if (i)
 			enable_icache();
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index 464de893a98..5d180cb0bd9 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -17,12 +17,14 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/io.h>
-
 #include <asm/system.h>
 #include <asm/cputype.h>
 #include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
 
 #define CR_L2	(1 << 26)
 
@@ -47,21 +49,11 @@ static inline void xsc3_l2_clean_mva(unsigned long addr)
 	__asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr));
 }
 
-static inline void xsc3_l2_clean_pa(unsigned long addr)
-{
-	xsc3_l2_clean_mva(__phys_to_virt(addr));
-}
-
 static inline void xsc3_l2_inv_mva(unsigned long addr)
 {
 	__asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr));
 }
 
-static inline void xsc3_l2_inv_pa(unsigned long addr)
-{
-	xsc3_l2_inv_mva(__phys_to_virt(addr));
-}
-
 static inline void xsc3_l2_inv_all(void)
 {
 	unsigned long l2ctype, set_way;
@@ -79,50 +71,103 @@ static inline void xsc3_l2_inv_all(void)
 	dsb();
 }
 
+#ifdef CONFIG_HIGHMEM
+#define l2_map_save_flags(x)		raw_local_save_flags(x)
+#define l2_map_restore_flags(x)		raw_local_irq_restore(x)
+#else
+#define l2_map_save_flags(x)		((x) = 0)
+#define l2_map_restore_flags(x)		((void)(x))
+#endif
+
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
+				      unsigned long flags)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long va = prev_va & PAGE_MASK;
+	unsigned long pa_offset = pa << (32 - PAGE_SHIFT);
+	if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) {
+		/*
+		 * Switching to a new page.  Because cache ops are
+		 * using virtual addresses only, we must put a mapping
+		 * in place for it.  We also enable interrupts for a
+		 * short while and disable them again to protect this
+		 * mapping.
+		 */
+		unsigned long idx;
+		raw_local_irq_restore(flags);
+		idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
+		va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+		raw_local_irq_restore(flags | PSR_I_BIT);
+		set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
+		local_flush_tlb_kernel_page(va);
+	}
+	return va + (pa_offset >> (32 - PAGE_SHIFT));
+#else
+	return __phys_to_virt(pa);
+#endif
+}
+
 static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 {
+	unsigned long vaddr, flags;
+
 	if (start == 0 && end == -1ul) {
 		xsc3_l2_inv_all();
 		return;
 	}
 
+	vaddr = -1;  /* to force the first mapping */
+	l2_map_save_flags(flags);
+
 	/*
 	 * Clean and invalidate partial first cache line.
 	 */
 	if (start & (CACHE_LINE_SIZE - 1)) {
-		xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1));
-		xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+		vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
 		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
 	}
 
 	/*
-	 * Clean and invalidate partial last cache line.
+	 * Invalidate all full cache lines between 'start' and 'end'.
 	 */
-	if (start < end && (end & (CACHE_LINE_SIZE - 1))) {
-		xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1));
-		xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
-		end &= ~(CACHE_LINE_SIZE - 1);
+	while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_inv_mva(vaddr);
+		start += CACHE_LINE_SIZE;
 	}
 
 	/*
-	 * Invalidate all full cache lines between 'start' and 'end'.
+	 * Clean and invalidate partial last cache line.
 	 */
-	while (start < end) {
-		xsc3_l2_inv_pa(start);
-		start += CACHE_LINE_SIZE;
+	if (start < end) {
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
 	}
 
+	l2_map_restore_flags(flags);
+
 	dsb();
 }
 
 static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
 {
+	unsigned long vaddr, flags;
+
+	vaddr = -1;  /* to force the first mapping */
+	l2_map_save_flags(flags);
+
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
-		xsc3_l2_clean_pa(start);
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
 
+	l2_map_restore_flags(flags);
+
 	dsb();
 }
 
@@ -148,18 +193,26 @@ static inline void xsc3_l2_flush_all(void)
 
 static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 {
+	unsigned long vaddr, flags;
+
 	if (start == 0 && end == -1ul) {
 		xsc3_l2_flush_all();
 		return;
 	}
 
+	vaddr = -1;  /* to force the first mapping */
+	l2_map_save_flags(flags);
+
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
-		xsc3_l2_clean_pa(start);
-		xsc3_l2_inv_pa(start);
+		vaddr = l2_map_va(start, vaddr, flags);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
 		start += CACHE_LINE_SIZE;
 	}
 
+	l2_map_restore_flags(flags);
+
 	dsb();
 }
 
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 00000000000..b2a6008b011
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ *  linux/arch/arm/lib/copypage-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r2, %0				@ 1\n\
+1:	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	r0, r0, #16			@ 1\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	r0, r0, #16			@ 1\n\
+	subs	r2, r2, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r2, c7, c10, 4		@ 1   drain WB\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to, KM_USER0);
+	kfrom = kmap_atomic(from, KM_USER1);
+	fa_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom, KM_USER1);
+	kunmap_atomic(kto, KM_USER0);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page, KM_USER0);
+	asm volatile("\
+	mov	r1, %2				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	stmia	%0, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	%0, %0, #16			@ 1\n\
+	stmia	%0, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	%0, %0, #16			@ 1\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 32)
+	: "r1", "r2", "r3", "ip", "lr");
+	kunmap_atomic(kaddr, KM_USER0);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+	.cpu_clear_user_highpage = fa_clear_user_highpage,
+	.cpu_copy_user_highpage	= fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c
index c3ba6a94da0..70997d5bee2 100644
--- a/arch/arm/mm/copypage-feroceon.c
+++ b/arch/arm/mm/copypage-feroceon.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 
-static void __attribute__((naked))
+static void __naked
 feroceon_copy_user_page(void *kto, const void *kfrom)
 {
 	asm("\
diff --git a/arch/arm/mm/copypage-v3.c b/arch/arm/mm/copypage-v3.c
index 70ed96c8af8..de9c06854ad 100644
--- a/arch/arm/mm/copypage-v3.c
+++ b/arch/arm/mm/copypage-v3.c
@@ -15,7 +15,7 @@
  *
  * FIXME: do we need to handle cache stuff...
  */
-static void __attribute__((naked))
+static void __naked
 v3_copy_user_page(void *kto, const void *kfrom)
 {
 	asm("\n\
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index 1601698b980..7370a7142b0 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(minicache_lock);
  * instruction.  If your processor does not supply this, you have to write your
  * own copy_user_highpage that does the right thing.
  */
-static void __attribute__((naked))
+static void __naked
 mc_copy_user_page(void *from, void *to)
 {
 	asm volatile(
diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c
index 3ec93dab765..9ab09841422 100644
--- a/arch/arm/mm/copypage-v4wb.c
+++ b/arch/arm/mm/copypage-v4wb.c
@@ -22,7 +22,7 @@
  * instruction.  If your processor does not supply this, you have to write your
  * own copy_user_highpage that does the right thing.
  */
-static void __attribute__((naked))
+static void __naked
 v4wb_copy_user_page(void *kto, const void *kfrom)
 {
 	asm("\
diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c
index 0f1188efae4..300efafd664 100644
--- a/arch/arm/mm/copypage-v4wt.c
+++ b/arch/arm/mm/copypage-v4wt.c
@@ -20,7 +20,7 @@
  * dirty data in the cache.  However, we do have to ensure that
  * subsequent reads are up to date.
  */
-static void __attribute__((naked))
+static void __naked
 v4wt_copy_user_page(void *kto, const void *kfrom)
 {
 	asm("\
diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c
index 39a994542ca..bc4525f5ab2 100644
--- a/arch/arm/mm/copypage-xsc3.c
+++ b/arch/arm/mm/copypage-xsc3.c
@@ -29,7 +29,7 @@
  * if we eventually end up using our copied page.
  *
  */
-static void __attribute__((naked))
+static void __naked
 xsc3_mc_copy_user_page(void *kto, const void *kfrom)
 {
 	asm("\
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index d18f2397ee2..76824d3e966 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -42,7 +42,7 @@ static DEFINE_SPINLOCK(minicache_lock);
  * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
  * and merged as appropriate.
  */
-static void __attribute__((naked))
+static void __naked
 mc_copy_user_page(void *from, void *to)
 {
 	/*
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 310e479309e..510c179b0ac 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -19,6 +19,7 @@
 #include <linux/dma-mapping.h>
 
 #include <asm/memory.h>
+#include <asm/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/sizes.h>
@@ -490,29 +491,101 @@ core_initcall(consistent_init);
  */
 void dma_cache_maint(const void *start, size_t size, int direction)
 {
-	const void *end = start + size;
+	void (*inner_op)(const void *, const void *);
+	void (*outer_op)(unsigned long, unsigned long);
 
-	BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1));
+	BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(start + size - 1));
 
 	switch (direction) {
 	case DMA_FROM_DEVICE:		/* invalidate only */
-		dmac_inv_range(start, end);
-		outer_inv_range(__pa(start), __pa(end));
+		inner_op = dmac_inv_range;
+		outer_op = outer_inv_range;
 		break;
 	case DMA_TO_DEVICE:		/* writeback only */
-		dmac_clean_range(start, end);
-		outer_clean_range(__pa(start), __pa(end));
+		inner_op = dmac_clean_range;
+		outer_op = outer_clean_range;
 		break;
 	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
-		dmac_flush_range(start, end);
-		outer_flush_range(__pa(start), __pa(end));
+		inner_op = dmac_flush_range;
+		outer_op = outer_flush_range;
 		break;
 	default:
 		BUG();
 	}
+
+	inner_op(start, start + size);
+	outer_op(__pa(start), __pa(start) + size);
 }
 EXPORT_SYMBOL(dma_cache_maint);
 
+static void dma_cache_maint_contiguous(struct page *page, unsigned long offset,
+				       size_t size, int direction)
+{
+	void *vaddr;
+	unsigned long paddr;
+	void (*inner_op)(const void *, const void *);
+	void (*outer_op)(unsigned long, unsigned long);
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		inner_op = dmac_inv_range;
+		outer_op = outer_inv_range;
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		inner_op = dmac_clean_range;
+		outer_op = outer_clean_range;
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		inner_op = dmac_flush_range;
+		outer_op = outer_flush_range;
+		break;
+	default:
+		BUG();
+	}
+
+	if (!PageHighMem(page)) {
+		vaddr = page_address(page) + offset;
+		inner_op(vaddr, vaddr + size);
+	} else {
+		vaddr = kmap_high_get(page);
+		if (vaddr) {
+			vaddr += offset;
+			inner_op(vaddr, vaddr + size);
+			kunmap_high(page);
+		}
+	}
+
+	paddr = page_to_phys(page) + offset;
+	outer_op(paddr, paddr + size);
+}
+
+void dma_cache_maint_page(struct page *page, unsigned long offset,
+			  size_t size, int dir)
+{
+	/*
+	 * A single sg entry may refer to multiple physically contiguous
+	 * pages.  But we still need to process highmem pages individually.
+	 * If highmem is not configured then the bulk of this loop gets
+	 * optimized out.
+	 */
+	size_t left = size;
+	do {
+		size_t len = left;
+		if (PageHighMem(page) && len + offset > PAGE_SIZE) {
+			if (offset >= PAGE_SIZE) {
+				page += offset / PAGE_SIZE;
+				offset %= PAGE_SIZE;
+			}
+			len = PAGE_SIZE - offset;
+		}
+		dma_cache_maint_contiguous(page, offset, len, dir);
+		offset = 0;
+		page++;
+		left -= len;
+	} while (left);
+}
+EXPORT_SYMBOL(dma_cache_maint_page);
+
 /**
  * dma_map_sg - map a set of SG buffers for streaming mode DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -610,7 +683,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 			continue;
 
 		if (!arch_is_coherent())
-			dma_cache_maint(sg_virt(s), s->length, dir);
+			dma_cache_maint_page(sg_page(s), s->offset,
+					     s->length, dir);
 	}
 }
 EXPORT_SYMBOL(dma_sync_sg_for_device);
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 0fa9bf388f0..4e283481cee 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -192,7 +192,7 @@ void flush_dcache_page(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 
 #ifndef CONFIG_SMP
-	if (mapping && !mapping_mapped(mapping))
+	if (!PageHighMem(page) && mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
 #endif
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
new file mode 100644
index 00000000000..a34954d9df7
--- /dev/null
+++ b/arch/arm/mm/highmem.c
@@ -0,0 +1,116 @@
+/*
+ * arch/arm/mm/highmem.c -- ARM highmem support
+ *
+ * Author:	Nicolas Pitre
+ * Created:	september 8, 2008
+ * Copyright:	Marvell Semiconductors Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
+
+void *kmap(struct page *page)
+{
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+	unsigned int idx;
+	unsigned long vaddr;
+
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	/*
+	 * With debugging enabled, kunmap_atomic forces that entry to 0.
+	 * Make sure it was indeed properly unmapped.
+	 */
+	BUG_ON(!pte_none(*(TOP_PTE(vaddr))));
+#endif
+	set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0);
+	/*
+	 * When debugging is off, kunmap_atomic leaves the previous mapping
+	 * in place, so this TLB flush ensures the TLB is updated with the
+	 * new mapping.
+	 */
+	local_flush_tlb_kernel_page(vaddr);
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	unsigned int idx = type + KM_TYPE_NR * smp_processor_id();
+
+	if (kvaddr >= (void *)FIXADDR_START) {
+		__cpuc_flush_dcache_page((void *)vaddr);
+#ifdef CONFIG_DEBUG_HIGHMEM
+		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+		set_pte_ext(TOP_PTE(vaddr), __pte(0), 0);
+		local_flush_tlb_kernel_page(vaddr);
+#else
+		(void) idx;  /* to kill a warning */
+#endif
+	}
+	pagefault_enable();
+}
+EXPORT_SYMBOL(kunmap_atomic);
+
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+{
+	unsigned int idx;
+	unsigned long vaddr;
+
+	pagefault_disable();
+
+	idx = type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(!pte_none(*(TOP_PTE(vaddr))));
+#endif
+	set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0);
+	local_flush_tlb_kernel_page(vaddr);
+
+	return (void *)vaddr;
+}
+
+struct page *kmap_atomic_to_page(const void *ptr)
+{
+	unsigned long vaddr = (unsigned long)ptr;
+	pte_t *pte;
+
+	if (vaddr < FIXADDR_START)
+		return virt_to_page(ptr);
+
+	pte = TOP_PTE(vaddr);
+	return pte_page(*pte);
+}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 34df4d9d03a..8277802ec85 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -15,6 +15,7 @@
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
+#include <linux/highmem.h>
 
 #include <asm/mach-types.h>
 #include <asm/sections.h>
@@ -382,7 +383,7 @@ void __init bootmem_init(void)
 	for_each_node(node)
 		bootmem_free_node(node, mi);
 
-	high_memory = __va(memend_pfn << PAGE_SHIFT);
+	high_memory = __va((memend_pfn << PAGE_SHIFT) - 1) + 1;
 
 	/*
 	 * This doesn't seem to be used by the Linux memory manager any
@@ -485,7 +486,7 @@ void __init mem_init(void)
 	int i, node;
 
 #ifndef CONFIG_DISCONTIGMEM
-	max_mapnr   = virt_to_page(high_memory) - mem_map;
+	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
 #endif
 
 	/* this will put all unused low memory onto the freelists */
@@ -504,6 +505,19 @@ void __init mem_init(void)
 				    __phys_to_pfn(__pa(swapper_pg_dir)), NULL);
 #endif
 
+#ifdef CONFIG_HIGHMEM
+	/* set highmem page free */
+	for_each_online_node(node) {
+		for_each_nodebank (i, &meminfo, node) {
+			unsigned long start = bank_pfn_start(&meminfo.bank[i]);
+			unsigned long end = bank_pfn_end(&meminfo.bank[i]);
+			if (start >= max_low_pfn + PHYS_PFN_OFFSET)
+				totalhigh_pages += free_area(start, end, NULL);
+		}
+	}
+	totalram_pages += totalhigh_pages;
+#endif
+
 	/*
 	 * Since our memory may not be contiguous, calculate the
 	 * real number of pages we have in this system
@@ -521,9 +535,10 @@ void __init mem_init(void)
 	initsize = __init_end - __init_begin;
 
 	printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
-		"%dK data, %dK init)\n",
+		"%dK data, %dK init, %luK highmem)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-		codesize >> 10, datasize >> 10, initsize >> 10);
+		codesize >> 10, datasize >> 10, initsize >> 10,
+		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 95bbe112965..c4f6f05198e 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -1,7 +1,6 @@
-/* the upper-most page table pointer */
-
 #ifdef CONFIG_MMU
 
+/* the upper-most page table pointer */
 extern pmd_t *top_pmd;
 
 #define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 5358fcc7f61..f7457fea6de 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -124,7 +124,7 @@ int valid_phys_addr_range(unsigned long addr, size_t size)
 {
 	if (addr < PHYS_OFFSET)
 		return 0;
-	if (addr + size > __pa(high_memory))
+	if (addr + size >= __pa(high_memory - 1))
 		return 0;
 
 	return 1;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d4d082c5c2d..b438fc4fb77 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -18,9 +18,11 @@
 #include <asm/cputype.h>
 #include <asm/mach-types.h>
 #include <asm/sections.h>
+#include <asm/cachetype.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/highmem.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -243,6 +245,10 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_NONCACHED] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
@@ -406,9 +412,28 @@ static void __init build_mem_type_table(void)
 		kern_pgprot |= L_PTE_SHARED;
 		vecs_pgprot |= L_PTE_SHARED;
 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
+		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
 #endif
 	}
 
+	/*
+	 * Non-cacheable Normal - intended for memory areas that must
+	 * not cause dirty cache line writebacks when used
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv6) {
+		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+			/* Non-cacheable Normal is XCB = 001 */
+			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+				PMD_SECT_BUFFERED;
+		} else {
+			/* For both ARMv6 and non-TEX-remapping ARMv7 */
+			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+				PMD_SECT_TEX(1);
+		}
+	} else {
+		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+	}
+
 	for (i = 0; i < 16; i++) {
 		unsigned long v = pgprot_val(protection_map[i]);
 		protection_map[i] = __pgprot(v | user_pgprot);
@@ -677,6 +702,10 @@ static void __init sanity_check_meminfo(void)
 			if (meminfo.nr_banks >= NR_BANKS) {
 				printk(KERN_CRIT "NR_BANKS too low, "
 						 "ignoring high memory\n");
+			} else if (cache_is_vipt_aliasing()) {
+				printk(KERN_CRIT "HIGHMEM is not yet supported "
+						 "with VIPT aliasing cache, "
+						 "ignoring high memory\n");
 			} else {
 				memmove(bank + 1, bank,
 					(meminfo.nr_banks - i) * sizeof(*bank));
@@ -694,7 +723,7 @@ static void __init sanity_check_meminfo(void)
 		 * the vmalloc area.
 		 */
 		if (__va(bank->start) >= VMALLOC_MIN ||
-		    __va(bank->start) < PAGE_OFFSET) {
+		    __va(bank->start) < (void *)PAGE_OFFSET) {
 			printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
 			       "(vmalloc region overlap).\n",
 			       bank->start, bank->start + bank->size - 1);
@@ -895,6 +924,17 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	flush_cache_all();
 }
 
+static void __init kmap_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	pmd_t *pmd = pmd_off_k(PKMAP_BASE);
+	pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
+	BUG_ON(!pmd_none(*pmd) || !pte);
+	__pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
+	pkmap_page_table = pte + PTRS_PER_PTE;
+#endif
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
@@ -908,6 +948,7 @@ void __init paging_init(struct machine_desc *mdesc)
 	prepare_page_table();
 	bootmem_init();
 	devicemaps_init(mdesc);
+	kmap_init();
 
 	top_pmd = pmd_off_k(0xffff0000);
 
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
new file mode 100644
index 00000000000..08b8a955d5d
--- /dev/null
+++ b/arch/arm/mm/proc-fa526.S
@@ -0,0 +1,248 @@
+/*
+ *  linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
+ *
+ *  Written by : Luke Lee
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the fa526.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#include "proc-macros.S"
+
+#define CACHE_DLINESIZE	16
+
+	.text
+/*
+ * cpu_fa526_proc_init()
+ */
+ENTRY(cpu_fa526_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_fa526_proc_fin()
+ */
+ENTRY(cpu_fa526_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	fa_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	nop
+	nop
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_fa526_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	4
+ENTRY(cpu_fa526_reset)
+/* TODO: Use CP8 if possible... */
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	bic	ip, ip, #0x0800			@ BTB off
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	nop
+	nop
+	mov	pc, r0
+
+/*
+ * cpu_fa526_do_idle()
+ */
+	.align	4
+ENTRY(cpu_fa526_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mov	pc, lr
+
+
+ENTRY(cpu_fa526_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_fa526_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	4
+ENTRY(cpu_fa526_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	mcr	p15, 0, ip, c7, c14, 0		@ clean and invalidate whole D cache
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c5, 6		@ invalidate BTB since mm changed
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate UTLB
+#endif
+	mov	pc, lr
+
+/*
+ * cpu_fa526_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	4
+ENTRY(cpu_fa526_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	mov	pc, lr
+
+	__INIT
+
+	.type	__fa526_setup, #function
+__fa526_setup:
+	/* On return of this routine, r0 must carry correct flags for CFG register */
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	mcr	p15, 0, r0, c7, c5, 5		@ invalidate IScratchpad RAM
+
+	mov	r0, #1
+	mcr	p15, 0, r0, c1, c1, 0		@ turn-on ECR
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB All
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+
+	mov	r0, #0x1f			@ Domains 0, 1 = manager, 2 = client
+	mcr	p15, 0, r0, c3, c0		@ load domain access register
+
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, fa526_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, fa526_cr1_set
+	orr	r0, r0, r5
+	mov	pc, lr
+	.size	__fa526_setup, . - __fa526_setup
+
+	/*
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 1001 .111 1101
+	 *
+	 */
+	.type	fa526_cr1_clear, #object
+	.type	fa526_cr1_set, #object
+fa526_cr1_clear:
+	.word	0x3f3f
+fa526_cr1_set:
+	.word	0x397D
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	fa526_processor_functions, #object
+fa526_processor_functions:
+	.word	v4_early_abort
+	.word	pabort_noifar
+	.word	cpu_fa526_proc_init
+	.word	cpu_fa526_proc_fin
+	.word	cpu_fa526_reset
+	.word   cpu_fa526_do_idle
+	.word	cpu_fa526_dcache_clean_area
+	.word	cpu_fa526_switch_mm
+	.word	cpu_fa526_set_pte_ext
+	.size	fa526_processor_functions, . - fa526_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv4"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v4"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_fa526_name, #object
+cpu_fa526_name:
+	.asciz	"FA526"
+	.size	cpu_fa526_name, . - cpu_fa526_name
+
+	.align
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	.type	__fa526_proc_info,#object
+__fa526_proc_info:
+	.long	0x66015261
+	.long	0xff01fff1
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__fa526_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF
+	.long	cpu_fa526_name
+	.long	fa526_processor_functions
+	.long	fa_tlb_fns
+	.long	fa_user_fns
+	.long	fa_cache_fns
+	.size	__fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
new file mode 100644
index 00000000000..540f5078496
--- /dev/null
+++ b/arch/arm/mm/proc-mohawk.S
@@ -0,0 +1,416 @@
+/*
+ *  linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core
+ *
+ *  PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core.
+ *
+ *  Heavily based on proc-arm926.S and proc-xsc3.S
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the
+ * area is larger than this, then we flush the whole cache.
+ */
+#define CACHE_DLIMIT	32768
+
+/*
+ * The cache line size of the L1 D cache.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * cpu_mohawk_proc_init()
+ */
+ENTRY(cpu_mohawk_proc_init)
+	mov	pc, lr
+
+/*
+ * cpu_mohawk_proc_fin()
+ */
+ENTRY(cpu_mohawk_proc_fin)
+	stmfd	sp!, {lr}
+	mov	ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	msr	cpsr_c, ip
+	bl	mohawk_flush_kern_cache_all
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...iz...........
+	bic	r0, r0, #0x0006			@ .............ca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ldmfd	sp!, {pc}
+
+/*
+ * cpu_mohawk_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ *
+ * (same as arm926)
+ */
+	.align	5
+ENTRY(cpu_mohawk_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x0007			@ .............cam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	mov	pc, r0
+
+/*
+ * cpu_mohawk_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	5
+ENTRY(cpu_mohawk_do_idle)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c7, c0, 4		@ wait for interrupt
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(mohawk_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(mohawk_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 0		@ drain write buffer
+	mov	pc, lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(mohawk_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	flush_kern_dcache_page(void *page)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- page aligned address
+ */
+ENTRY(mohawk_flush_kern_dcache_page)
+	add	r1, r0, #PAGE_SZ
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(mohawk_dma_inv_range)
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+ENTRY(mohawk_dma_clean_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(mohawk_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+ENTRY(mohawk_cache_fns)
+	.long	mohawk_flush_kern_cache_all
+	.long	mohawk_flush_user_cache_all
+	.long	mohawk_flush_user_cache_range
+	.long	mohawk_coherent_kern_range
+	.long	mohawk_coherent_user_range
+	.long	mohawk_flush_kern_dcache_page
+	.long	mohawk_dma_inv_range
+	.long	mohawk_dma_clean_range
+	.long	mohawk_dma_flush_range
+
+ENTRY(cpu_mohawk_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+/*
+ * cpu_mohawk_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_mohawk_switch_mm)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	orr	r0, r0, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mov	pc, lr
+
+/*
+ * cpu_mohawk_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_mohawk_set_pte_ext)
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	pc, lr
+
+	__INIT
+
+	.type	__mohawk_setup, #function
+__mohawk_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs
+	orr	r4, r4, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+
+	mov	r0, #0				@ don't allow CP access
+	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
+
+	adr	r5, mohawk_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	mov	pc, lr
+
+	.size	__mohawk_setup, . - __mohawk_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..00 0101
+	 *
+	 */
+	.type	mohawk_crval, #object
+mohawk_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134
+
+	__INITDATA
+
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+	.type	mohawk_processor_functions, #object
+mohawk_processor_functions:
+	.word	v5t_early_abort
+	.word	pabort_noifar
+	.word	cpu_mohawk_proc_init
+	.word	cpu_mohawk_proc_fin
+	.word	cpu_mohawk_reset
+	.word	cpu_mohawk_do_idle
+	.word	cpu_mohawk_dcache_clean_area
+	.word	cpu_mohawk_switch_mm
+	.word	cpu_mohawk_set_pte_ext
+	.size	mohawk_processor_functions, . - mohawk_processor_functions
+
+	.section ".rodata"
+
+	.type	cpu_arch_name, #object
+cpu_arch_name:
+	.asciz	"armv5te"
+	.size	cpu_arch_name, . - cpu_arch_name
+
+	.type	cpu_elf_name, #object
+cpu_elf_name:
+	.asciz	"v5"
+	.size	cpu_elf_name, . - cpu_elf_name
+
+	.type	cpu_mohawk_name, #object
+cpu_mohawk_name:
+	.asciz	"Marvell 88SV331x"
+	.size	cpu_mohawk_name, . - cpu_mohawk_name
+
+	.align
+
+	.section ".proc.info.init", #alloc, #execinstr
+
+	.type	__88sv331x_proc_info,#object
+__88sv331x_proc_info:
+	.long	0x56158000			@ Marvell 88SV331x (MOHAWK)
+	.long	0xfffff000
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	b	__mohawk_setup
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_mohawk_name
+	.long	mohawk_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	mohawk_cache_fns
+	.size	__88sv331x_proc_info, . - __88sv331x_proc_info
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
new file mode 100644
index 00000000000..9694f1f6f48
--- /dev/null
+++ b/arch/arm/mm/tlb-fa.S
@@ -0,0 +1,75 @@
+/*
+ *  linux/arch/arm/mm/tlb-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on tlb-v4wbi.S:
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4, Faraday variation.
+ *  This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
+ *
+ *  Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+
+/*
+ *	flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	4
+ENTRY(fa_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3			@ == mm ?
+	movne	pc, lr				@ no, we dont do anything
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r3, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
+	mov	pc, lr
+
+
+ENTRY(fa_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r3, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush
+	mov	pc, lr
+
+	__INITDATA
+
+	.type	fa_tlb_fns, #object
+ENTRY(fa_tlb_fns)
+	.long	fa_flush_user_tlb_range
+	.long	fa_flush_kern_tlb_range
+	.long	fa_tlb_flags
+	.size	fa_tlb_fns, . - fa_tlb_fns