12 files changed, 276 insertions, 635 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 7f7b52f9beb..0e7ba8e891c 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -82,8 +82,7 @@ config 32BIT
 
 config PMB_ENABLE
 	bool "Support 32-bit physical addressing through PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
+	depends on MMU && EXPERIMENTAL && CPU_SH4A
 	default y
 	help
 	  If you say Y here, physical addressing will be extended to
@@ -97,8 +96,7 @@ choice
 
 config PMB
 	bool "PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
+	depends on MMU && EXPERIMENTAL && CPU_SH4A
 	help
 	  If you say Y here, physical addressing will be extended to
 	  32-bits through the SH-4A PMB. If this is not set, legacy
@@ -106,9 +104,7 @@ config PMB
 
 config PMB_FIXED
 	bool "fixed PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || \
-					   CPU_SUBTYPE_SH7780 || \
-					   CPU_SUBTYPE_SH7785)
+	depends on MMU && EXPERIMENTAL && CPU_SH4A
 	select 32BIT
 	help
 	  If this option is enabled, fixed PMB mappings are inherited
@@ -258,6 +254,15 @@ endchoice
 
 source "mm/Kconfig"
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 endmenu
 
 menu "Cache configuration"
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 3759bf85329..8a70535fa7c 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -33,8 +33,7 @@ obj-y				+= $(tlb-y)
 endif
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
+obj-$(CONFIG_PMB_ENABLE)	+= pmb.o
 obj-$(CONFIG_NUMA)		+= numa.o
 
 # Special flags for fault_64.o.  This puts restrictions on the number of
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index b7f235c74d6..f36a08bf3d5 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,7 +2,7 @@
  * arch/sh/mm/cache-sh4.c
  *
  * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2001 - 2007  Paul Mundt
+ * Copyright (C) 2001 - 2009  Paul Mundt
  * Copyright (C) 2003  Richard Curnow
  * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
  *
@@ -15,6 +15,8 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/fs.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -23,21 +25,12 @@
  * flushing. Anything exceeding this will simply flush the dcache in its
  * entirety.
  */
-#define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
 #define MAX_ICACHE_PAGES	32
 
 static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
 /*
- * This is initialised here to ensure that it is not placed in the BSS.  If
- * that were to happen, note that cache_init gets called before the BSS is
- * cleared, so this would get nulled out which would be hopeless.
- */
-static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
-	(void (*)(unsigned long, unsigned long))0xdeadbeef;
-
-/*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
@@ -97,15 +90,15 @@ static inline void flush_cache_one(unsigned long start, unsigned long phys)
 	unsigned long flags, exec_offset = 0;
 
 	/*
-	 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
-	 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
+	 * All types of SH-4 require PC to be uncached to operate on the I-cache.
+	 * Some types of SH-4 require PC to be uncached to operate on the D-cache.
 	 */
 	if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
 	    (start < CACHE_OC_ADDRESS_ARRAY))
-		exec_offset = 0x20000000;
+		exec_offset = cached_to_uncached;
 
 	local_irq_save(flags);
-	__flush_cache_one(start | SH_CACHE_ASSOC, P1SEGADDR(phys), exec_offset);
+	__flush_cache_one(start, phys, exec_offset);
 	local_irq_restore(flags);
 }
 
@@ -124,7 +117,7 @@ static void sh4_flush_dcache_page(void *arg)
 	else
 #endif
 	{
-		unsigned long phys = PHYSADDR(page_address(page));
+		unsigned long phys = page_to_phys(page);
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 		int i, n;
 
@@ -159,10 +152,27 @@ static void __uses_jump_to_uncached flush_icache_all(void)
 	local_irq_restore(flags);
 }
 
-static inline void flush_dcache_all(void)
+static void flush_dcache_all(void)
 {
-	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
-	wmb();
+	unsigned long addr, end_addr, entry_offset;
+
+	end_addr = CACHE_OC_ADDRESS_ARRAY +
+		(current_cpu_data.dcache.sets <<
+		 current_cpu_data.dcache.entry_shift) *
+			current_cpu_data.dcache.ways;
+
+	entry_offset = 1 << current_cpu_data.dcache.entry_shift;
+
+	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+	}
 }
 
 static void sh4_flush_cache_all(void *unused)
@@ -171,89 +181,13 @@ static void sh4_flush_cache_all(void *unused)
 	flush_icache_all();
 }
 
-static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
-			     unsigned long end)
-{
-	unsigned long d = 0, p = start & PAGE_MASK;
-	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
-	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
-	unsigned long select_bit;
-	unsigned long all_aliases_mask;
-	unsigned long addr_offset;
-	pgd_t *dir;
-	pmd_t *pmd;
-	pud_t *pud;
-	pte_t *pte;
-	int i;
-
-	dir = pgd_offset(mm, p);
-	pud = pud_offset(dir, p);
-	pmd = pmd_offset(pud, p);
-	end = PAGE_ALIGN(end);
-
-	all_aliases_mask = (1 << n_aliases) - 1;
-
-	do {
-		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
-			p &= PMD_MASK;
-			p += PMD_SIZE;
-			pmd++;
-
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, p);
-
-		do {
-			unsigned long phys;
-			pte_t entry = *pte;
-
-			if (!(pte_val(entry) & _PAGE_PRESENT)) {
-				pte++;
-				p += PAGE_SIZE;
-				continue;
-			}
-
-			phys = pte_val(entry) & PTE_PHYS_MASK;
-
-			if ((p ^ phys) & alias_mask) {
-				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
-				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
-
-				if (d == all_aliases_mask)
-					goto loop_exit;
-			}
-
-			pte++;
-			p += PAGE_SIZE;
-		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
-		pmd++;
-	} while (p < end);
-
-loop_exit:
-	addr_offset = 0;
-	select_bit = 1;
-
-	for (i = 0; i < n_aliases; i++) {
-		if (d & select_bit) {
-			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
-			wmb();
-		}
-
-		select_bit <<= 1;
-		addr_offset += PAGE_SIZE;
-	}
-}
-
 /*
  * Note : (RPC) since the caches are physically tagged, the only point
  * of flush_cache_mm for SH-4 is to get rid of aliases from the
  * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
  * lines can stay resident so long as the virtual address they were
  * accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag).  It's no different here.  So I reckon we don't
- * need to flush the I-cache, since aliases don't matter for that.  We
- * should try that.
+ * address (i.e. tag).  It's no different here.
  *
  * Caller takes mm->mmap_sem.
  */
@@ -264,33 +198,7 @@ static void sh4_flush_cache_mm(void *arg)
 	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
 		return;
 
-	/*
-	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
-	 * the cache is physically tagged, the data can just be left in there.
-	 */
-	if (boot_cpu_data.dcache.n_aliases == 0)
-		return;
-
-	/*
-	 * Don't bother groveling around the dcache for the VMA ranges
-	 * if there are too many PTEs to make it worthwhile.
-	 */
-	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else {
-		struct vm_area_struct *vma;
-
-		/*
-		 * In this case there are reasonably sized ranges to flush,
-		 * iterate through the VMA list and take care of any aliases.
-		 */
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
-	}
-
-	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
-	if (mm->exec_vm)
-		flush_icache_all();
+	flush_dcache_all();
 }
 
 /*
@@ -303,44 +211,63 @@ static void sh4_flush_cache_page(void *args)
 {
 	struct flusher_data *data = args;
 	struct vm_area_struct *vma;
+	struct page *page;
 	unsigned long address, pfn, phys;
-	unsigned int alias_mask;
+	int map_coherent = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	void *vaddr;
 
 	vma = data->vma;
-	address = data->addr1;
+	address = data->addr1 & PAGE_MASK;
 	pfn = data->addr2;
 	phys = pfn << PAGE_SHIFT;
+	page = pfn_to_page(pfn);
 
 	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 		return;
 
-	alias_mask = boot_cpu_data.dcache.alias_mask;
-
-	/* We only need to flush D-cache when we have alias */
-	if ((address^phys) & alias_mask) {
-		/* Loop 4K of the D-cache */
-		flush_cache_one(
-			CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
-		/* Loop another 4K of the D-cache */
-		flush_cache_one(
-			CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
-			phys);
-	}
+	pgd = pgd_offset(vma->vm_mm, address);
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	pte = pte_offset_kernel(pmd, address);
+
+	/* If the page isn't present, there is nothing to do here. */
+	if (!(pte_val(*pte) & _PAGE_PRESENT))
+		return;
 
-	alias_mask = boot_cpu_data.icache.alias_mask;
-	if (vma->vm_flags & VM_EXEC) {
+	if ((vma->vm_mm == current->active_mm))
+		vaddr = NULL;
+	else {
 		/*
-		 * Evict entries from the portion of the cache from which code
-		 * may have been executed at this address (virtual).  There's
-		 * no need to evict from the portion corresponding to the
-		 * physical address as for the D-cache, because we know the
-		 * kernel has never executed the code through its identity
-		 * translation.
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
 		 */
-		flush_cache_one(
-			CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
+		map_coherent = (current_cpu_data.dcache.n_aliases &&
+			!test_bit(PG_dcache_dirty, &page->flags) &&
+			page_mapped(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, address);
+		else
+			vaddr = kmap_atomic(page, KM_USER0);
+
+		address = (unsigned long)vaddr;
+	}
+
+	if (pages_do_alias(address, phys))
+		flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+			(address & shm_align_mask), phys);
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_all();
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent(vaddr);
+		else
+			kunmap_atomic(vaddr, KM_USER0);
 	}
 }
 
@@ -373,24 +300,10 @@ static void sh4_flush_cache_range(void *args)
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 
-	/*
-	 * Don't bother with the lookup and alias check if we have a
-	 * wide range to cover, just blow away the dcache in its
-	 * entirety instead. -- PFM.
-	 */
-	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else
-		__flush_cache_mm(vma->vm_mm, start, end);
+	flush_dcache_all();
 
-	if (vma->vm_flags & VM_EXEC) {
-		/*
-		 * TODO: Is this required???  Need to look at how I-cache
-		 * coherency is assured when new programs are loaded to see if
-		 * this matters.
-		 */
+	if (vma->vm_flags & VM_EXEC)
 		flush_icache_all();
-	}
 }
 
 /**
@@ -464,245 +377,6 @@ static void __flush_cache_one(unsigned long addr, unsigned long phys,
 	} while (--way_count != 0);
 }
 
-/*
- * Break the 1, 2 and 4 way variants of this out into separate functions to
- * avoid nearly all the overhead of having the conditional stuff in the function
- * bodies (+ the 1 and 2 way cases avoid saving any registers too).
- *
- * We want to eliminate unnecessary bus transactions, so this code uses
- * a non-obvious technique.
- *
- * Loop over a cache way sized block of, one cache line at a time. For each
- * line, use movca.a to cause the current cache line contents to be written
- * back, but without reading anything from main memory. However this has the
- * side effect that the cache is now caching that memory location. So follow
- * this with a cache invalidate to mark the cache line invalid. And do all
- * this with interrupts disabled, to avoid the cache line being accidently
- * evicted while it is holding garbage.
- *
- * This also breaks in a number of circumstances:
- * - if there are modifications to the region of memory just above
- *   empty_zero_page (for example because a breakpoint has been placed
- *   there), then these can be lost.
- *
- *   This is because the the memory address which the cache temporarily
- *   caches in the above description is empty_zero_page. So the
- *   movca.l hits the cache (it is assumed that it misses, or at least
- *   isn't dirty), modifies the line and then invalidates it, losing the
- *   required change.
- *
- * - If caches are disabled or configured in write-through mode, then
- *   the movca.l writes garbage directly into memory.
- */
-static void __flush_dcache_segment_writethrough(unsigned long start,
-					        unsigned long extent_per_way)
-{
-	unsigned long addr;
-	int i;
-
-	addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask);
-
-	while (extent_per_way) {
-		for (i = 0; i < cpu_data->dcache.ways; i++)
-			__raw_writel(0, addr + cpu_data->dcache.way_incr * i);
-
-		addr += cpu_data->dcache.linesz;
-		extent_per_way -= cpu_data->dcache.linesz;
-	}
-}
-
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/*
-	 * The previous code aligned base_addr to 16k, i.e. the way_size of all
-	 * existing SH-4 D-caches.  Whilst I don't see a need to have this
-	 * aligned to any better than the cache line size (which it will be
-	 * anyway by construction), let's align it to at least the way_size of
-	 * any existing or conceivable SH-4 D-cache.  -- RPC
-	 */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a2, a3, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a2 = a1 + way_incr;
-	a3 = a2 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-	} while (a0 < a0e);
-}
-
 extern void __weak sh4__flush_region_init(void);
 
 /*
@@ -710,32 +384,11 @@ extern void __weak sh4__flush_region_init(void);
  */
 void __init sh4_cache_init(void)
 {
-	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
-
 	printk("PVR=%08x CVR=%08x PRR=%08x\n",
 		ctrl_inl(CCN_PVR),
 		ctrl_inl(CCN_CVR),
 		ctrl_inl(CCN_PRR));
 
-	if (wt_enabled)
-		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
-	else {
-		switch (boot_cpu_data.dcache.ways) {
-		case 1:
-			__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-			break;
-		case 2:
-			__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-			break;
-		case 4:
-			__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-			break;
-		default:
-			panic("unknown number of cache ways\n");
-			break;
-		}
-	}
-
 	local_flush_icache_range	= sh4_flush_icache_range;
 	local_flush_dcache_page		= sh4_flush_dcache_page;
 	local_flush_cache_all		= sh4_flush_cache_all;
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 467ff8e260f..eb4cc4ec795 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -563,7 +563,7 @@ static void sh5_flush_cache_page(void *args)
 
 static void sh5_flush_dcache_page(void *page)
 {
-	sh64_dcache_purge_phy_page(page_to_phys(page));
+	sh64_dcache_purge_phy_page(page_to_phys((struct page *)page));
 	wmb();
 }
 
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 2601935eb58..f527fb70fce 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -141,7 +141,7 @@ static void sh7705_flush_dcache_page(void *arg)
 	if (mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
-		__flush_dcache_page(PHYSADDR(page_address(page)));
+		__flush_dcache_page(__pa(page_address(page)));
 }
 
 static void __uses_jump_to_uncached sh7705_flush_cache_all(void *args)
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index a2dc7f9ecc5..e9415d3ea94 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -27,8 +27,11 @@ void (*local_flush_icache_page)(void *args) = cache_noop;
 void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
 
 void (*__flush_wback_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_wback_region);
 void (*__flush_purge_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_purge_region);
 void (*__flush_invalidate_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_invalidate_region);
 
 static inline void noop__flush_region(void *start, int size)
 {
@@ -161,14 +164,21 @@ void flush_cache_all(void)
 {
 	cacheop_on_each_cpu(local_flush_cache_all, NULL, 1);
 }
+EXPORT_SYMBOL(flush_cache_all);
 
 void flush_cache_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
 }
 
 void flush_cache_dup_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
 }
 
@@ -195,11 +205,13 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 
 	cacheop_on_each_cpu(local_flush_cache_range, (void *)&data, 1);
 }
+EXPORT_SYMBOL(flush_cache_range);
 
 void flush_dcache_page(struct page *page)
 {
 	cacheop_on_each_cpu(local_flush_dcache_page, page, 1);
 }
+EXPORT_SYMBOL(flush_dcache_page);
 
 void flush_icache_range(unsigned long start, unsigned long end)
 {
@@ -265,7 +277,11 @@ static void __init emit_cache_params(void)
 
 void __init cpu_cache_init(void)
 {
-	unsigned int cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+	unsigned int cache_disabled = 0;
+
+#ifdef CCR
+	cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+#endif
 
 	compute_alias(&boot_cpu_data.icache);
 	compute_alias(&boot_cpu_data.dcache);
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index e098ec158dd..902967e3f84 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -15,11 +15,15 @@
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/io.h>
+#include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
 
 #define PREALLOC_DMA_DEBUG_ENTRIES	4096
 
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
 static int __init dma_init(void)
 {
 	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
@@ -27,15 +31,12 @@ static int __init dma_init(void)
 }
 fs_initcall(dma_init);
 
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t gfp)
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret, *ret_nocache;
 	int order = get_order(size);
 
-	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
-		return ret;
-
 	ret = (void *)__get_free_pages(gfp, order);
 	if (!ret)
 		return NULL;
@@ -57,35 +58,26 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 
 	*dma_handle = virt_to_phys(ret);
 
-	debug_dma_alloc_coherent(dev, size, *dma_handle, ret_nocache);
-
 	return ret_nocache;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+void dma_generic_free_coherent(struct device *dev, size_t size,
+			       void *vaddr, dma_addr_t dma_handle)
 {
 	int order = get_order(size);
 	unsigned long pfn = dma_handle >> PAGE_SHIFT;
 	int k;
 
-	WARN_ON(irqs_disabled());	/* for portability */
-
-	if (dma_release_from_coherent(dev, order, vaddr))
-		return;
-
-	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
 	for (k = 0; k < (1 << order); k++)
 		__free_pages(pfn_to_page(pfn + k), 0);
+
 	iounmap(vaddr);
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction direction)
 {
-#ifdef CONFIG_CPU_SH5
+#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB)
 	void *p1addr = vaddr;
 #else
 	void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 8173e38afd3..432acd07e76 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -15,6 +15,7 @@
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
+#include <linux/dma-mapping.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
@@ -186,11 +187,21 @@ void __init paging_init(void)
 	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
 }
 
+/*
+ * Early initialization for any I/O MMUs we might have.
+ */
+static void __init iommu_init(void)
+{
+	no_iommu_init();
+}
+
 void __init mem_init(void)
 {
 	int codesize, datasize, initsize;
 	int nid;
 
+	iommu_init();
+
 	num_physpages = 0;
 	high_memory = NULL;
 
@@ -323,4 +334,12 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_PMB
+int __in_29bit_mode(void)
+{
+	return !(ctrl_inl(PMB_PASCR) & PASCR_SE);
+}
+#endif /* CONFIG_PMB */
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 16e01b5fed0..15d74ea4209 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -39,7 +39,9 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 	pagefault_disable();
 
 	idx = FIX_CMAP_END -
-		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
+		(((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) +
+		 (FIX_N_COLOURS * smp_processor_id()));
+
 	vaddr = __fix_to_virt(idx);
 
 	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 9b784fdb947..6c524446c0f 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -60,7 +60,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	unsigned long bootmem_paddr;
 
 	/* Don't allow bogus node assignment */
-	BUG_ON(nid > MAX_NUMNODES || nid == 0);
+	BUG_ON(nid > MAX_NUMNODES || nid <= 0);
 
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
diff --git a/arch/sh/mm/pmb-fixed.c b/arch/sh/mm/pmb-fixed.c
deleted file mode 100644
index 43c8eac4d8a..00000000000
--- a/arch/sh/mm/pmb-fixed.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * arch/sh/mm/fixed_pmb.c
- *
- * Copyright (C) 2009  Renesas Solutions Corp.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-
-static int __uses_jump_to_uncached fixed_pmb_init(void)
-{
-	int i;
-	unsigned long addr, data;
-
-	jump_to_uncached();
-
-	for (i = 0; i < PMB_ENTRY_MAX; i++) {
-		addr = PMB_DATA + (i << PMB_E_SHIFT);
-		data = ctrl_inl(addr);
-		if (!(data & PMB_V))
-			continue;
-
-		if (data & PMB_C) {
-#if defined(CONFIG_CACHE_WRITETHROUGH)
-			data |= PMB_WT;
-#elif defined(CONFIG_CACHE_WRITEBACK)
-			data &= ~PMB_WT;
-#else
-			data &= ~(PMB_C | PMB_WT);
-#endif
-		}
-		ctrl_outl(data, addr);
-	}
-
-	back_to_cached();
-
-	return 0;
-}
-arch_initcall(fixed_pmb_init);
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index aade3110211..280f6a16603 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -35,29 +35,9 @@
 
 static void __pmb_unmap(struct pmb_entry *);
 
-static struct kmem_cache *pmb_cache;
+static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES];
 static unsigned long pmb_map;
 
-static struct pmb_entry pmb_init_map[] = {
-	/* vpn         ppn         flags (ub/sz/c/wt) */
-
-	/* P1 Section Mappings */
-	{ 0x80000000, 0x00000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x84000000, 0x04000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x88000000, 0x08000000, PMB_SZ_128M | PMB_C, },
-	{ 0x90000000, 0x10000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x94000000, 0x14000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x98000000, 0x18000000, PMB_SZ_64M  | PMB_C, },
-
-	/* P2 Section Mappings */
-	{ 0xa0000000, 0x00000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa4000000, 0x04000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa8000000, 0x08000000, PMB_UB | PMB_SZ_128M | PMB_WT, },
-	{ 0xb0000000, 0x10000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb4000000, 0x14000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb8000000, 0x18000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-};
-
 static inline unsigned long mk_pmb_entry(unsigned int entry)
 {
 	return (entry & PMB_E_MASK) << PMB_E_SHIFT;
@@ -73,81 +53,68 @@ static inline unsigned long mk_pmb_data(unsigned int entry)
 	return mk_pmb_entry(entry) | PMB_DATA;
 }
 
-static DEFINE_SPINLOCK(pmb_list_lock);
-static struct pmb_entry *pmb_list;
-
-static inline void pmb_list_add(struct pmb_entry *pmbe)
+static int pmb_alloc_entry(void)
 {
-	struct pmb_entry **p, *tmp;
+	unsigned int pos;
 
-	p = &pmb_list;
-	while ((tmp = *p) != NULL)
-		p = &tmp->next;
+repeat:
+	pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
 
-	pmbe->next = tmp;
-	*p = pmbe;
-}
+	if (unlikely(pos > NR_PMB_ENTRIES))
+		return -ENOSPC;
 
-static inline void pmb_list_del(struct pmb_entry *pmbe)
-{
-	struct pmb_entry **p, *tmp;
+	if (test_and_set_bit(pos, &pmb_map))
+		goto repeat;
 
-	for (p = &pmb_list; (tmp = *p); p = &tmp->next)
-		if (tmp == pmbe) {
-			*p = tmp->next;
-			return;
-		}
+	return pos;
 }
 
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags)
+static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
+				   unsigned long flags, int entry)
 {
 	struct pmb_entry *pmbe;
+	int pos;
+
+	if (entry == PMB_NO_ENTRY) {
+		pos = pmb_alloc_entry();
+		if (pos < 0)
+			return ERR_PTR(pos);
+	} else {
+		if (test_bit(entry, &pmb_map))
+			return ERR_PTR(-ENOSPC);
+		pos = entry;
+	}
 
-	pmbe = kmem_cache_alloc(pmb_cache, GFP_KERNEL);
+	pmbe = &pmb_entry_list[pos];
 	if (!pmbe)
 		return ERR_PTR(-ENOMEM);
 
 	pmbe->vpn	= vpn;
 	pmbe->ppn	= ppn;
 	pmbe->flags	= flags;
-
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_add(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	pmbe->entry	= pos;
 
 	return pmbe;
 }
 
-void pmb_free(struct pmb_entry *pmbe)
+static void pmb_free(struct pmb_entry *pmbe)
 {
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_del(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	int pos = pmbe->entry;
 
-	kmem_cache_free(pmb_cache, pmbe);
+	pmbe->vpn	= 0;
+	pmbe->ppn	= 0;
+	pmbe->flags	= 0;
+	pmbe->entry	= 0;
+
+	clear_bit(pos, &pmb_map);
 }
 
 /*
  * Must be in P2 for __set_pmb_entry()
  */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry)
+static void __set_pmb_entry(unsigned long vpn, unsigned long ppn,
+			    unsigned long flags, int pos)
 {
-	unsigned int pos = *entry;
-
-	if (unlikely(pos == PMB_NO_ENTRY))
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-
-repeat:
-	if (unlikely(pos > NR_PMB_ENTRIES))
-		return -ENOSPC;
-
-	if (test_and_set_bit(pos, &pmb_map)) {
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-		goto repeat;
-	}
-
 	ctrl_outl(vpn | PMB_V, mk_pmb_addr(pos));
 
 #ifdef CONFIG_CACHE_WRITETHROUGH
@@ -161,35 +128,21 @@ repeat:
 #endif
 
 	ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos));
-
-	*entry = pos;
-
-	return 0;
 }
 
-int __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
+static void __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
 {
-	int ret;
-
 	jump_to_uncached();
-	ret = __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &pmbe->entry);
+	__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, pmbe->entry);
 	back_to_cached();
-
-	return ret;
 }
 
-void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
+static void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
 {
 	unsigned int entry = pmbe->entry;
 	unsigned long addr;
 
-	/*
-	 * Don't allow clearing of wired init entries, P1 or P2 access
-	 * without a corresponding mapping in the PMB will lead to reset
-	 * by the TLB.
-	 */
-	if (unlikely(entry < ARRAY_SIZE(pmb_init_map) ||
-		     entry >= NR_PMB_ENTRIES))
+	if (unlikely(entry >= NR_PMB_ENTRIES))
 		return;
 
 	jump_to_uncached();
@@ -202,8 +155,6 @@ void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
 	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
 
 	back_to_cached();
-
-	clear_bit(entry, &pmb_map);
 }
 
 
@@ -239,23 +190,17 @@ long pmb_remap(unsigned long vaddr, unsigned long phys,
 
 again:
 	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
-		int ret;
-
 		if (size < pmb_sizes[i].size)
 			continue;
 
-		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag);
+		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag,
+				 PMB_NO_ENTRY);
 		if (IS_ERR(pmbe)) {
 			err = PTR_ERR(pmbe);
 			goto out;
 		}
 
-		ret = set_pmb_entry(pmbe);
-		if (ret != 0) {
-			pmb_free(pmbe);
-			err = -EBUSY;
-			goto out;
-		}
+		set_pmb_entry(pmbe);
 
 		phys	+= pmb_sizes[i].size;
 		vaddr	+= pmb_sizes[i].size;
@@ -292,11 +237,16 @@ out:
 
 void pmb_unmap(unsigned long addr)
 {
-	struct pmb_entry **p, *pmbe;
+	struct pmb_entry *pmbe = NULL;
+	int i;
 
-	for (p = &pmb_list; (pmbe = *p); p = &pmbe->next)
-		if (pmbe->vpn == addr)
-			break;
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, &pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			if (pmbe->vpn == addr)
+				break;
+		}
+	}
 
 	if (unlikely(!pmbe))
 		return;
@@ -306,13 +256,22 @@ void pmb_unmap(unsigned long addr)
 
 static void __pmb_unmap(struct pmb_entry *pmbe)
 {
-	WARN_ON(!test_bit(pmbe->entry, &pmb_map));
+	BUG_ON(!test_bit(pmbe->entry, &pmb_map));
 
 	do {
 		struct pmb_entry *pmblink = pmbe;
 
-		if (pmbe->entry != PMB_NO_ENTRY)
-			clear_pmb_entry(pmbe);
+		/*
+		 * We may be called before this pmb_entry has been
+		 * entered into the PMB table via set_pmb_entry(), but
+		 * that's OK because we've allocated a unique slot for
+		 * this entry in pmb_alloc() (even if we haven't filled
+		 * it yet).
+		 *
+		 * Therefore, calling clear_pmb_entry() is safe as no
+		 * other mapping can be using that slot.
+		 */
+		clear_pmb_entry(pmbe);
 
 		pmbe = pmblink->link;
 
@@ -320,42 +279,34 @@ static void __pmb_unmap(struct pmb_entry *pmbe)
 	} while (pmbe);
 }
 
-static void pmb_cache_ctor(void *pmb)
+#ifdef CONFIG_PMB
+int __uses_jump_to_uncached pmb_init(void)
 {
-	struct pmb_entry *pmbe = pmb;
-
-	memset(pmb, 0, sizeof(struct pmb_entry));
-
-	pmbe->entry = PMB_NO_ENTRY;
-}
-
-static int __uses_jump_to_uncached pmb_init(void)
-{
-	unsigned int nr_entries = ARRAY_SIZE(pmb_init_map);
-	unsigned int entry, i;
-
-	BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES));
-
-	pmb_cache = kmem_cache_create("pmb", sizeof(struct pmb_entry), 0,
-				      SLAB_PANIC, pmb_cache_ctor);
+	unsigned int i;
+	long size, ret;
 
 	jump_to_uncached();
 
 	/*
-	 * Ordering is important, P2 must be mapped in the PMB before we
-	 * can set PMB.SE, and P1 must be mapped before we jump back to
-	 * P1 space.
+	 * Insert PMB entries for the P1 and P2 areas so that, after
+	 * we've switched the MMU to 32-bit mode, the semantics of P1
+	 * and P2 are the same as in 29-bit mode, e.g.
+	 *
+	 *	P1 - provides a cached window onto physical memory
+	 *	P2 - provides an uncached window onto physical memory
 	 */
-	for (entry = 0; entry < nr_entries; entry++) {
-		struct pmb_entry *pmbe = pmb_init_map + entry;
+	size = __MEMORY_START + __MEMORY_SIZE;
 
-		__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &entry);
-	}
+	ret = pmb_remap(P1SEG, 0x00000000, size, PMB_C);
+	BUG_ON(ret != size);
+
+	ret = pmb_remap(P2SEG, 0x00000000, size, PMB_WT | PMB_UB);
+	BUG_ON(ret != size);
 
 	ctrl_outl(0, PMB_IRMCR);
 
 	/* PMB.SE and UB[7] */
-	ctrl_outl((1 << 31) | (1 << 7), PMB_PASCR);
+	ctrl_outl(PASCR_SE | (1 << 7), PMB_PASCR);
 
 	/* Flush out the TLB */
 	i =  ctrl_inl(MMUCR);
@@ -366,7 +317,53 @@ static int __uses_jump_to_uncached pmb_init(void)
 
 	return 0;
 }
-arch_initcall(pmb_init);
+#else
+int __uses_jump_to_uncached pmb_init(void)
+{
+	int i;
+	unsigned long addr, data;
+
+	jump_to_uncached();
+
+	for (i = 0; i < PMB_ENTRY_MAX; i++) {
+		struct pmb_entry *pmbe;
+		unsigned long vpn, ppn, flags;
+
+		addr = PMB_DATA + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+		if (!(data & PMB_V))
+			continue;
+
+		if (data & PMB_C) {
+#if defined(CONFIG_CACHE_WRITETHROUGH)
+			data |= PMB_WT;
+#elif defined(CONFIG_CACHE_WRITEBACK)
+			data &= ~PMB_WT;
+#else
+			data &= ~(PMB_C | PMB_WT);
+#endif
+		}
+		ctrl_outl(data, addr);
+
+		ppn = data & PMB_PFN_MASK;
+
+		flags = data & (PMB_C | PMB_WT | PMB_UB);
+		flags |= data & PMB_SZ_MASK;
+
+		addr = PMB_ADDR + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+
+		vpn = data & PMB_PFN_MASK;
+
+		pmbe = pmb_alloc(vpn, ppn, flags, i);
+		WARN_ON(IS_ERR(pmbe));
+	}
+
+	back_to_cached();
+
+	return 0;
+}
+#endif /* CONFIG_PMB */
 
 static int pmb_seq_show(struct seq_file *file, void *iter)
 {
@@ -434,15 +431,18 @@ postcore_initcall(pmb_debugfs_init);
 static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 {
 	static pm_message_t prev_state;
+	int i;
 
 	/* Restore the PMB after a resume from hibernation */
 	if (state.event == PM_EVENT_ON &&
 	    prev_state.event == PM_EVENT_FREEZE) {
 		struct pmb_entry *pmbe;
-		spin_lock_irq(&pmb_list_lock);
-		for (pmbe = pmb_list; pmbe; pmbe = pmbe->next)
-			set_pmb_entry(pmbe);
-		spin_unlock_irq(&pmb_list_lock);
+		for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+			if (test_bit(i, &pmb_map)) {
+				pmbe = &pmb_entry_list[i];
+				set_pmb_entry(pmbe);
+			}
+		}
 	}
 	prev_state = state;
 	return 0;