From 14cf11af6cf608eb8c23e989ddb17a715ddce109 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 26 Sep 2005 16:04:21 +1000
Subject: powerpc: Merge enough to start building in arch/powerpc.

This creates the directory structure under arch/powerpc and a bunch
of Kconfig files.  It does a first-cut merge of arch/powerpc/mm,
arch/powerpc/lib and arch/powerpc/platforms/powermac.  This is enough
to build a 32-bit powermac kernel with ARCH=powerpc.

For now we are getting some unmerged files from arch/ppc/kernel and
arch/ppc/syslib, or arch/ppc64/kernel.  This makes some minor changes
to files in those directories and files outside arch/powerpc.

The boot directory is still not merged.  That's going to be interesting.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/44x_mmu.c       | 120 ++++++++
 arch/powerpc/mm/4xx_mmu.c       | 141 +++++++++
 arch/powerpc/mm/Makefile        |  12 +
 arch/powerpc/mm/fault.c         | 391 +++++++++++++++++++++++++
 arch/powerpc/mm/fsl_booke_mmu.c | 237 +++++++++++++++
 arch/powerpc/mm/hash_32.S       | 618 ++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/init.c          | 581 +++++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/init64.c        | 385 +++++++++++++++++++++++++
 arch/powerpc/mm/mem.c           | 299 +++++++++++++++++++
 arch/powerpc/mm/mem64.c         | 259 +++++++++++++++++
 arch/powerpc/mm/mem_pieces.c    | 163 +++++++++++
 arch/powerpc/mm/mem_pieces.h    |  48 ++++
 arch/powerpc/mm/mmu_context.c   |  86 ++++++
 arch/powerpc/mm/mmu_context64.c |  63 ++++
 arch/powerpc/mm/mmu_decl.h      |  85 ++++++
 arch/powerpc/mm/pgtable.c       | 470 ++++++++++++++++++++++++++++++
 arch/powerpc/mm/pgtable64.c     | 357 +++++++++++++++++++++++
 arch/powerpc/mm/ppc_mmu.c       | 296 +++++++++++++++++++
 arch/powerpc/mm/tlb.c           | 183 ++++++++++++
 19 files changed, 4794 insertions(+)
 create mode 100644 arch/powerpc/mm/44x_mmu.c
 create mode 100644 arch/powerpc/mm/4xx_mmu.c
 create mode 100644 arch/powerpc/mm/Makefile
 create mode 100644 arch/powerpc/mm/fault.c
 create mode 100644 arch/powerpc/mm/fsl_booke_mmu.c
 create mode 100644 arch/powerpc/mm/hash_32.S
 create mode 100644 arch/powerpc/mm/init.c
 create mode 100644 arch/powerpc/mm/init64.c
 create mode 100644 arch/powerpc/mm/mem.c
 create mode 100644 arch/powerpc/mm/mem64.c
 create mode 100644 arch/powerpc/mm/mem_pieces.c
 create mode 100644 arch/powerpc/mm/mem_pieces.h
 create mode 100644 arch/powerpc/mm/mmu_context.c
 create mode 100644 arch/powerpc/mm/mmu_context64.c
 create mode 100644 arch/powerpc/mm/mmu_decl.h
 create mode 100644 arch/powerpc/mm/pgtable.c
 create mode 100644 arch/powerpc/mm/pgtable64.c
 create mode 100644 arch/powerpc/mm/ppc_mmu.c
 create mode 100644 arch/powerpc/mm/tlb.c

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
new file mode 100644
index 00000000000..3d79ce281b6
--- /dev/null
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -0,0 +1,120 @@
+/*
+ * Modifications by Matt Porter (mporter@mvista.com) to support
+ * PPC44x Book E processors.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+#include "mmu_decl.h"
+
+extern char etext[], _stext[];
+
+/* Used by the 44x TLB replacement exception handler.
+ * Just needed it declared someplace.
+ */
+unsigned int tlb_44x_index = 0;
+unsigned int tlb_44x_hwater = 62;
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
+ */
+static void __init
+ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys)
+{
+	unsigned long attrib = 0;
+
+	__asm__ __volatile__("\
+	clrrwi	%2,%2,10\n\
+	ori	%2,%2,%4\n\
+	clrrwi	%1,%1,10\n\
+	li	%0,0\n\
+	ori	%0,%0,%5\n\
+	tlbwe	%2,%3,%6\n\
+	tlbwe	%1,%3,%7\n\
+	tlbwe	%0,%3,%8"
+	:
+	: "r" (attrib), "r" (phys), "r" (virt), "r" (slot),
+	  "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M),
+	  "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+	  "i" (PPC44x_TLB_PAGEID),
+	  "i" (PPC44x_TLB_XLAT),
+	  "i" (PPC44x_TLB_ATTRIB));
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	unsigned int pinned_tlbs = 1;
+	int i;
+
+	/* Determine number of entries necessary to cover lowmem */
+	pinned_tlbs = (unsigned int)
+		(_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT);
+
+	/* Write upper watermark to save location */
+	tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
+
+	/* If necessary, set additional pinned TLBs */
+	if (pinned_tlbs > 1)
+		for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
+			unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE;
+			ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
+		}
+
+	return total_lowmem;
+}
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c
new file mode 100644
index 00000000000..b7bcbc232f3
--- /dev/null
+++ b/arch/powerpc/mm/4xx_mmu.c
@@ -0,0 +1,141 @@
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include "mmu_decl.h"
+
+extern int __map_without_ltlbs;
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	/*
+	 * The Zone Protection Register (ZPR) defines how protection will
+	 * be applied to every page which is a member of a given zone. At
+	 * present, we utilize only two of the 4xx's zones.
+	 * The zone index bits (of ZSEL) in the PTE are used for software
+	 * indicators, except the LSB.  For user access, zone 1 is used,
+	 * for kernel access, zone 0 is used.  We set all but zone 1
+	 * to zero, allowing only kernel access as indicated in the PTE.
+	 * For zone 1, we set a 01 binary (a value of 10 will not work)
+	 * to allow user access as indicated in the PTE.  This also allows
+	 * kernel access as indicated in the PTE.
+	 */
+
+        mtspr(SPRN_ZPR, 0x10000000);
+
+	flush_instruction_cache();
+
+	/*
+	 * Set up the real-mode cache parameters for the exception vector
+	 * handlers (which are run in real-mode).
+	 */
+
+        mtspr(SPRN_DCWR, 0x00000000);	/* All caching is write-back */
+
+        /*
+	 * Cache instruction and data space where the exception
+	 * vectors and the kernel live in real-mode.
+	 */
+
+        mtspr(SPRN_DCCR, 0xF0000000);	/* 512 MB of data space at 0x0. */
+        mtspr(SPRN_ICCR, 0xF0000000);	/* 512 MB of instr. space at 0x0. */
+}
+
+#define LARGE_PAGE_SIZE_16M	(1<<24)
+#define LARGE_PAGE_SIZE_4M	(1<<22)
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	unsigned long v, s;
+	phys_addr_t p;
+
+	v = KERNELBASE;
+	p = PPC_MEMSTART;
+	s = 0;
+
+	if (__map_without_ltlbs) {
+		return s;
+	}
+
+	while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+		spin_lock(&init_mm.page_table_lock);
+		pmdp = pmd_offset(pgd_offset_k(v), v);
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		spin_unlock(&init_mm.page_table_lock);
+
+		v += LARGE_PAGE_SIZE_16M;
+		p += LARGE_PAGE_SIZE_16M;
+		s += LARGE_PAGE_SIZE_16M;
+	}
+
+	while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+		spin_lock(&init_mm.page_table_lock);
+		pmdp = pmd_offset(pgd_offset_k(v), v);
+		pmd_val(*pmdp) = val;
+		spin_unlock(&init_mm.page_table_lock);
+
+		v += LARGE_PAGE_SIZE_4M;
+		p += LARGE_PAGE_SIZE_4M;
+		s += LARGE_PAGE_SIZE_4M;
+	}
+
+	return s;
+}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
new file mode 100644
index 00000000000..9f52c26acd8
--- /dev/null
+++ b/arch/powerpc/mm/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ppc-specific parts of the memory manager.
+#
+
+obj-y				:= fault.o mem.o
+obj-$(CONFIG_PPC32)		+= init.o pgtable.o mmu_context.o \
+				   mem_pieces.o tlb.o
+obj-$(CONFIG_PPC64)		+= init64.o pgtable64.o mmu_context64.o
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu.o hash_32.o
+obj-$(CONFIG_40x)		+= 4xx_mmu.o
+obj-$(CONFIG_44x)		+= 44x_mmu.o
+obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
new file mode 100644
index 00000000000..3df641fa789
--- /dev/null
+++ b/arch/powerpc/mm/fault.c
@@ -0,0 +1,391 @@
+/*
+ *  arch/ppc/mm/fault.c
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+#include <asm/siginfo.h>
+
+/*
+ * Check whether the instruction at regs->nip is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+	unsigned int inst;
+
+	if (get_user(inst, (unsigned int __user *)regs->nip))
+		return 0;
+	/* check for 1 in the rA field */
+	if (((inst >> 16) & 0x1f) != 1)
+		return 0;
+	/* check major opcode */
+	switch (inst >> 26) {
+	case 37:	/* stwu */
+	case 39:	/* stbu */
+	case 45:	/* sthu */
+	case 53:	/* stfsu */
+	case 55:	/* stfdu */
+		return 1;
+	case 62:	/* std or stdu */
+		return (inst & 3) == 1;
+	case 31:
+		/* check minor opcode */
+		switch ((inst >> 1) & 0x3ff) {
+		case 181:	/* stdux */
+		case 183:	/* stwux */
+		case 247:	/* stbux */
+		case 439:	/* sthux */
+		case 695:	/* stfsux */
+		case 759:	/* stfdux */
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void do_dabr(struct pt_regs *regs, unsigned long error_code)
+{
+	siginfo_t info;
+
+	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+			11, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (debugger_dabr_match(regs))
+		return;
+
+	/* Clear the DABR */
+	set_dabr(0);
+
+	/* Deliver the signal to userspace */
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code = TRAP_HWBKPT;
+	info.si_addr = (void __user *)regs->nip;
+	force_sig_info(SIGTRAP, &info, current);
+}
+
+/*
+ * For 600- and 800-family processors, the error_code parameter is DSISR
+ * for a data fault, SRR1 for an instruction fault. For 400-family processors
+ * the error_code parameter is ESR for a data fault, 0 for an instruction
+ * fault.
+ * For 64-bit processors, the error_code parameter is
+ *  - DSISR for a non-SLB data access fault,
+ *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
+ *  - 0 any SLB fault.
+ *
+ * The return value is 0 if the fault was handled, or the signal
+ * number if this is a kernel fault that can't be handled here.
+ */
+int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+			    unsigned long error_code)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	siginfo_t info;
+	int code = SEGV_MAPERR;
+	int is_write = 0;
+	int trap = TRAP(regs);
+ 	int is_exec = trap == 0x400;
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+	/*
+	 * Fortunately the bit assignments in SRR1 for an instruction
+	 * fault and DSISR for a data fault are mostly the same for the
+	 * bits we are interested in.  But there are some bits which
+	 * indicate errors in DSISR but can validly be set in SRR1.
+	 */
+	if (trap == 0x400)
+		error_code &= 0x48200000;
+	else
+		is_write = error_code & DSISR_ISSTORE;
+#else
+	is_write = error_code & ESR_DST;
+#endif /* CONFIG_4xx || CONFIG_BOOKE */
+
+	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
+				11, SIGSEGV) == NOTIFY_STOP)
+		return 0;
+
+	if (trap == 0x300) {
+		if (debugger_fault_handler(regs))
+			return 0;
+	}
+
+	/* On a kernel SLB miss we can only check for a valid exception entry */
+	if (!user_mode(regs) && (address >= TASK_SIZE))
+		return SIGSEGV;
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+  	if (error_code & DSISR_DABRMATCH) {
+		/* DABR match */
+		do_dabr(regs, error_code);
+		return 0;
+	}
+#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
+
+	if (in_atomic() || mm == NULL) {
+		if (!user_mode(regs))
+			return SIGSEGV;
+		/* in_atomic() in user mode is really bad,
+		   as is current->mm == NULL. */
+		printk(KERN_EMERG "Page fault in user mode with"
+		       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
+		       regs->nip, regs->msr);
+		die("Weird page fault", regs, SIGSEGV);
+	}
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
+	 * erroneous fault occuring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibilty of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (!user_mode(regs) && !search_exception_tables(regs->nip))
+			goto bad_area_nosemaphore;
+
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	/*
+	 * N.B. The POWER/Open ABI allows programs to access up to
+	 * 288 bytes below the stack pointer.
+	 * The kernel signal delivery code writes up to about 1.5kB
+	 * below the stack pointer (r1) before decrementing it.
+	 * The exec code can write slightly over 640kB to the stack
+	 * before setting the user r1.  Thus we allow the stack to
+	 * expand to 1MB without further checks.
+	 */
+	if (address + 0x100000 < vma->vm_end) {
+		/* get user regs even if this fault is in kernel mode */
+		struct pt_regs *uregs = current->thread.regs;
+		if (uregs == NULL)
+			goto bad_area;
+
+		/*
+		 * A user-mode access to an address a long way below
+		 * the stack pointer is only valid if the instruction
+		 * is one which would update the stack pointer to the
+		 * address accessed if the instruction completed,
+		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+		 * (or the byte, halfword, float or double forms).
+		 *
+		 * If we don't check this then any write to the area
+		 * between the last mapped region and the stack will
+		 * expand the stack rather than segfaulting.
+		 */
+		if (address + 2048 < uregs->gpr[1]
+		    && (!user_mode(regs) || !store_updates_sp(regs)))
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+good_area:
+	code = SEGV_ACCERR;
+#if defined(CONFIG_6xx)
+	if (error_code & 0x95700000)
+		/* an error such as lwarx to I/O controller space,
+		   address matching DABR, eciwx, etc. */
+		goto bad_area;
+#endif /* CONFIG_6xx */
+#if defined(CONFIG_8xx)
+        /* The MPC8xx seems to always set 0x80000000, which is
+         * "undefined".  Of those that can be set, this is the only
+         * one which seems bad.
+         */
+	if (error_code & 0x10000000)
+                /* Guarded storage error. */
+		goto bad_area;
+#endif /* CONFIG_8xx */
+
+	if (is_exec) {
+#ifdef CONFIG_PPC64
+		/* protection fault */
+		if (error_code & DSISR_PROTFAULT)
+			goto bad_area;
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+#endif
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+		pte_t *ptep;
+
+		/* Since 4xx/Book-E supports per-page execute permission,
+		 * we lazily flush dcache to icache. */
+		ptep = NULL;
+		if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
+			struct page *page = pte_page(*ptep);
+
+			if (! test_bit(PG_arch_1, &page->flags)) {
+				flush_dcache_icache_page(page);
+				set_bit(PG_arch_1, &page->flags);
+			}
+			pte_update(ptep, 0, _PAGE_HWEXEC);
+			_tlbie(address);
+			pte_unmap(ptep);
+			up_read(&mm->mmap_sem);
+			return 0;
+		}
+		if (ptep != NULL)
+			pte_unmap(ptep);
+#endif
+	/* a write */
+	} else if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	/* a read */
+	} else {
+		/* protection fault */
+		if (error_code & 0x08000000)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+ survive:
+	switch (handle_mm_fault(mm, vma, address, is_write)) {
+
+	case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	case VM_FAULT_OOM:
+		goto out_of_memory;
+	default:
+		BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return 0;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses cause a SIGSEGV */
+	if (user_mode(regs)) {
+		_exception(SIGSEGV, regs, code, address);
+		return 0;
+	}
+
+	if (is_exec && (error_code & DSISR_PROTFAULT)
+	    && printk_ratelimit())
+		printk(KERN_CRIT "kernel tried to execute NX-protected"
+		       " page (%lx) - exploit attempt? (uid: %d)\n",
+		       address, current->uid);
+
+	return SIGSEGV;
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", current->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	return SIGKILL;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	if (user_mode(regs)) {
+		info.si_signo = SIGBUS;
+		info.si_errno = 0;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGBUS, &info, current);
+		return 0;
+	}
+	return SIGBUS;
+}
+
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from the DSI and ISI handlers in head.S and from some
+ * of the procedures in traps.c.
+ */
+void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault?  */
+	if ((entry = search_exception_tables(regs->nip)) != NULL) {
+		regs->nip = entry->fixup;
+		return;
+	}
+
+	/* kernel has accessed a bad area */
+	die("Kernel access of bad area", regs, sig);
+}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
new file mode 100644
index 00000000000..af9ca0eb6d5
--- /dev/null
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -0,0 +1,237 @@
+/*
+ * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * E500 Book E processors.
+ *
+ * Copyright 2004 Freescale Semiconductor, Inc
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+extern void loadcam_entry(unsigned int index);
+unsigned int tlbcam_index;
+unsigned int num_tlbcam_entries;
+static unsigned long __cam0, __cam1, __cam2;
+extern unsigned long total_lowmem;
+extern unsigned long __max_low_memory;
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+#define NUM_TLBCAMS	(16)
+
+struct tlbcam {
+   	u32	MAS0;
+	u32	MAS1;
+	u32	MAS2;
+	u32	MAS3;
+	u32	MAS7;
+} TLBCAM[NUM_TLBCAMS];
+
+struct tlbcamrange {
+   	unsigned long start;
+	unsigned long limit;
+	phys_addr_t phys;
+} tlbcam_addrs[NUM_TLBCAMS];
+
+extern unsigned int tlbcam_index;
+
+/*
+ * Return PA for this VA if it is mapped by a CAM, or 0
+ */
+unsigned long v_mapped_by_tlbcam(unsigned long va)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
+			return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_tlbcam(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (pa >= tlbcam_addrs[b].phys
+	    	    && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+		              +tlbcam_addrs[b].phys)
+			return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
+	return 0;
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 4 between 4k and 256M.
+ */
+void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		unsigned int size, int flags, unsigned int pid)
+{
+	unsigned int tsize, lz;
+
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
+	tsize = (21 - lz) / 2;
+
+#ifdef CONFIG_SMP
+	if ((flags & _PAGE_NO_CACHE) == 0)
+		flags |= _PAGE_COHERENT;
+#endif
+
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
+	TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
+	TLBCAM[index].MAS2 = virt & PAGE_MASK;
+
+	TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
+
+	TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR;
+	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
+
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+	if (flags & _PAGE_USER) {
+	   TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+	   TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+	}
+#else
+	TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+#endif
+
+	tlbcam_addrs[index].start = virt;
+	tlbcam_addrs[index].limit = virt + size - 1;
+	tlbcam_addrs[index].phys = phys;
+
+	loadcam_entry(index);
+}
+
+void invalidate_tlbcam_entry(int index)
+{
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
+	TLBCAM[index].MAS1 = ~MAS1_VALID;
+
+	loadcam_entry(index);
+}
+
+void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
+		unsigned long cam2)
+{
+	settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
+	tlbcam_index++;
+	if (cam1) {
+		tlbcam_index++;
+		settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
+	}
+	if (cam2) {
+		tlbcam_index++;
+		settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
+	}
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	cam_mapin_ram(__cam0, __cam1, __cam2);
+
+	return __cam0 + __cam1 + __cam2;
+}
+
+
+void __init
+adjust_total_lowmem(void)
+{
+	unsigned long max_low_mem = MAX_LOW_MEM;
+	unsigned long cam_max = 0x10000000;
+	unsigned long ram;
+
+	/* adjust CAM size to max_low_mem */
+	if (max_low_mem < cam_max)
+		cam_max = max_low_mem;
+
+	/* adjust lowmem size to max_low_mem */
+	if (max_low_mem < total_lowmem)
+		ram = max_low_mem;
+	else
+		ram = total_lowmem;
+
+	/* Calculate CAM values */
+	__cam0 = 1UL << 2 * (__ilog2(ram) / 2);
+	if (__cam0 > cam_max)
+		__cam0 = cam_max;
+	ram -= __cam0;
+	if (ram) {
+		__cam1 = 1UL << 2 * (__ilog2(ram) / 2);
+		if (__cam1 > cam_max)
+			__cam1 = cam_max;
+		ram -= __cam1;
+	}
+	if (ram) {
+		__cam2 = 1UL << 2 * (__ilog2(ram) / 2);
+		if (__cam2 > cam_max)
+			__cam2 = cam_max;
+		ram -= __cam2;
+	}
+
+	printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
+			" CAM2=%ldMb residual: %ldMb\n",
+			__cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
+			(total_lowmem - __cam0 - __cam1 - __cam2) >> 20);
+	__max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2;
+}
diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S
new file mode 100644
index 00000000000..57278a8dd13
--- /dev/null
+++ b/arch/powerpc/mm/hash_32.S
@@ -0,0 +1,618 @@
+/*
+ *  arch/ppc/kernel/hashtable.S
+ *
+ *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  This file contains low-level assembler routines for managing
+ *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
+ *  hash table, so this file is not used on them.)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SMP
+	.comm	mmu_hash_lock,4
+#endif /* CONFIG_SMP */
+
+/*
+ * Sync CPUs with hash_page taking & releasing the hash
+ * table lock
+ */
+#ifdef CONFIG_SMP
+	.text
+_GLOBAL(hash_page_sync)
+	lis	r8,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+	eieio
+	li	r0,0
+	stw	r0,0(r8)
+	blr	
+#endif
+
+/*
+ * Load a PTE into the hash table, if possible.
+ * The address is in r4, and r3 contains an access flag:
+ * _PAGE_RW (0x400) if a write.
+ * r9 contains the SRR1 value, from which we use the MSR_PR bit.
+ * SPRG3 contains the physical address of the current task's thread.
+ *
+ * Returns to the caller if the access is illegal or there is no
+ * mapping for the address.  Otherwise it places an appropriate PTE
+ * in the hash table and returns from the exception.
+ * Uses r0, r3 - r8, ctr, lr.
+ */
+	.text
+_GLOBAL(hash_page)
+#ifdef CONFIG_PPC64BRIDGE
+	mfmsr	r0
+	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
+	MTMSRD(r0)
+	isync
+#endif
+	tophys(r7,0)			/* gets -KERNELBASE into r7 */
+#ifdef CONFIG_SMP
+	addis	r8,r7,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+#endif
+	/* Get PTE (linux-style) and check access */
+	lis	r0,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r4,r0
+	mfspr	r8,SPRN_SPRG3		/* current task's THREAD (phys) */
+	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
+	lwz	r5,PGDIR(r8)		/* virt page-table root */
+	blt+	112f			/* assume user more likely */
+	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
+	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
+112:	add	r5,r5,r7		/* convert to phys addr */
+	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
+	lwz	r8,0(r5)		/* get pmd entry */
+	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
+#ifdef CONFIG_SMP
+	beq-	hash_page_out		/* return if no mapping */
+#else
+	/* XXX it seems like the 601 will give a machine fault on the
+	   rfi if its alignment is wrong (bottom 4 bits of address are
+	   8 or 0xc) and we have had a not-taken conditional branch
+	   to the address following the rfi. */
+	beqlr-
+#endif
+	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
+	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
+	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
+
+	/*
+	 * Update the linux PTE atomically.  We do the lwarx up-front
+	 * because almost always, there won't be a permission violation
+	 * and there won't already be an HPTE, and thus we will have
+	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
+	 */
+retry:
+	lwarx	r6,0,r8			/* get linux-style pte */
+	andc.	r5,r3,r6		/* check access & ~permission */
+#ifdef CONFIG_SMP
+	bne-	hash_page_out		/* return if access not permitted */
+#else
+	bnelr-
+#endif
+	or	r5,r0,r6		/* set accessed/dirty bits */
+	stwcx.	r5,0,r8			/* attempt to update PTE */
+	bne-	retry			/* retry if someone got there first */
+
+	mfsrin	r3,r4			/* get segment reg for segment */
+	mfctr	r0
+	stw	r0,_CTR(r11)
+	bl	create_hpte		/* add the hash table entry */
+
+#ifdef CONFIG_SMP
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+#endif
+
+	/* Return from the exception */
+	lwz	r5,_CTR(r11)
+	mtctr	r5
+	lwz	r0,GPR0(r11)
+	lwz	r7,GPR7(r11)
+	lwz	r8,GPR8(r11)
+	b	fast_exception_return
+
+#ifdef CONFIG_SMP
+hash_page_out:
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+	blr
+#endif /* CONFIG_SMP */
+
+/*
+ * Add an entry for a particular page to the hash table.
+ *
+ * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
+ *
+ * We assume any necessary modifications to the pte (e.g. setting
+ * the accessed bit) have already been done and that there is actually
+ * a hash table in use (i.e. we're not on a 603).
+ */
+_GLOBAL(add_hash_page)
+	mflr	r0
+	stw	r0,4(r1)
+
+	/* Convert context and va to VSID */
+	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
+
+#ifdef CONFIG_SMP
+	rlwinm	r8,r1,0,0,18		/* use cpu number to make tag */
+	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
+	oris	r8,r8,12
+#endif /* CONFIG_SMP */
+
+	/*
+	 * We disable interrupts here, even on UP, because we don't
+	 * want to race with hash_page, and because we want the
+	 * _PAGE_HASHPTE bit to be a reliable indication of whether
+	 * the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	tophys(r7,0)
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+10:	lwarx	r0,0,r9			/* take the mmu_hash_lock */
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
+	 * If _PAGE_HASHPTE was already set, we don't replace the existing
+	 * HPTE, so we just unlock and return.
+	 */
+	mr	r8,r5
+	rlwimi	r8,r4,22,20,29
+1:	lwarx	r6,0,r8
+	andi.	r0,r6,_PAGE_HASHPTE
+	bne	9f			/* if HASHPTE already set, done */
+	ori	r5,r6,_PAGE_HASHPTE
+	stwcx.	r5,0,r8
+	bne-	1b
+
+	bl	create_hpte
+
+9:
+#ifdef CONFIG_SMP
+	eieio
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+	/* reenable interrupts and DR */
+	mtmsr	r10
+	SYNC_601
+	isync
+
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+/*
+ * This routine adds a hardware PTE to the hash table.
+ * It is designed to be called with the MMU either on or off.
+ * r3 contains the VSID, r4 contains the virtual address,
+ * r5 contains the linux PTE, r6 contains the old value of the
+ * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
+ * offset to be added to addresses (0 if the MMU is on,
+ * -KERNELBASE if it is off).
+ * On SMP, the caller should have the mmu_hash_lock held.
+ * We assume that the caller has (or will) set the _PAGE_HASHPTE
+ * bit in the linux PTE in memory.  The value passed in r6 should
+ * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
+ * this routine will skip the search for an existing HPTE.
+ * This procedure modifies r0, r3 - r6, r8, cr0.
+ *  -- paulus.
+ *
+ * For speed, 4 of the instructions get patched once the size and
+ * physical address of the hash table are known.  These definitions
+ * of Hash_base and Hash_bits below are just an example.
+ */
+Hash_base = 0xc0180000
+Hash_bits = 12				/* e.g. 256kB hash table */
+Hash_msk = (((1 << Hash_bits) - 1) * 64)
+
+#ifndef CONFIG_PPC64BRIDGE
+/* defines for the PTE format for 32-bit PPCs */
+#define PTE_SIZE	8
+#define PTEG_SIZE	64
+#define LG_PTEG_SIZE	6
+#define LDPTEu		lwzu
+#define STPTE		stw
+#define CMPPTE		cmpw
+#define PTE_H		0x40
+#define PTE_V		0x80000000
+#define TST_V(r)	rlwinm. r,r,0,0,0
+#define SET_V(r)	oris r,r,PTE_V@h
+#define CLR_V(r,t)	rlwinm r,r,0,1,31
+
+#else
+/* defines for the PTE format for 64-bit PPCs */
+#define PTE_SIZE	16
+#define PTEG_SIZE	128
+#define LG_PTEG_SIZE	7
+#define LDPTEu		ldu
+#define STPTE		std
+#define CMPPTE		cmpd
+#define PTE_H		2
+#define PTE_V		1
+#define TST_V(r)	andi. r,r,PTE_V
+#define SET_V(r)	ori r,r,PTE_V
+#define CLR_V(r,t)	li t,PTE_V; andc r,r,t
+#endif /* CONFIG_PPC64BRIDGE */
+
+#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
+#define HASH_RIGHT	31-LG_PTEG_SIZE
+
+_GLOBAL(create_hpte)
+	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
+	rlwinm	r8,r5,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r0,r5,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r8,r8,r0		/* writable if _RW & _DIRTY */
+	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r8,r8,0xe14		/* clear out reserved bits and M */
+	andc	r8,r5,r8		/* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+	ori	r8,r8,_PAGE_COHERENT	/* set M (coherence required) */
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
+
+	/* Construct the high word of the PPC-style PTE (r5) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r5,r3,12		/* shift vsid into position */
+	rlwimi	r5,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r5)			/* set V (valid) bit */
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(hash_page_patch_A)
+	addis	r0,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r3,r3,r0		/* make primary hash */
+	li	r0,8			/* PTEs/group */
+
+	/*
+	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
+	 * if it is clear, meaning that the HPTE isn't there already...
+	 */
+	andi.	r6,r6,_PAGE_HASHPTE
+	beq+	10f			/* no PTE: go look for an empty slot */
+	tlbie	r4
+
+	addis	r4,r7,htab_hash_searches@ha
+	lwz	r6,htab_hash_searches@l(r4)
+	addi	r6,r6,1			/* count how many searches we do */
+	stw	r6,htab_hash_searches@l(r4)
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	mtctr	r0
+	addi	r4,r3,-PTE_SIZE
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	CMPPTE	0,r6,r5
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_slot
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_B)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	CMPPTE	0,r6,r5
+	bdnzf	2,2b
+	beq+	found_slot
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/* Search the primary PTEG for an empty slot */
+10:	mtctr	r0
+	addi	r4,r3,-PTE_SIZE		/* search primary PTEG */
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	TST_V(r6)			/* test valid bit */
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_empty
+
+	/* update counter of times that the primary PTEG is full */
+	addis	r4,r7,primary_pteg_full@ha
+	lwz	r6,primary_pteg_full@l(r4)
+	addi	r6,r6,1
+	stw	r6,primary_pteg_full@l(r4)
+
+	/* Search the secondary PTEG for an empty slot */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_C)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	TST_V(r6)
+	bdnzf	2,2b
+	beq+	found_empty
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/*
+	 * Choose an arbitrary slot in the primary PTEG to overwrite.
+	 * Since both the primary and secondary PTEGs are full, and we
+	 * have no information that the PTEs in the primary PTEG are
+	 * more important or useful than those in the secondary PTEG,
+	 * and we know there is a definite (although small) speed
+	 * advantage to putting the PTE in the primary PTEG, we always
+	 * put the PTE in the primary PTEG.
+	 */
+	addis	r4,r7,next_slot@ha
+	lwz	r6,next_slot@l(r4)
+	addi	r6,r6,PTE_SIZE
+	andi.	r6,r6,7*PTE_SIZE
+	stw	r6,next_slot@l(r4)
+	add	r4,r3,r6
+
+#ifndef CONFIG_SMP
+	/* Store PTE in PTEG */
+found_empty:
+	STPTE	r5,0(r4)
+found_slot:
+	STPTE	r8,PTE_SIZE/2(r4)
+
+#else /* CONFIG_SMP */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB.  So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-)  The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+found_empty:
+found_slot:
+	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
+	STPTE	r5,0(r4)
+	sync
+	TLBSYNC
+	STPTE	r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
+	sync
+	SET_V(r5)
+	STPTE	r5,0(r4)	/* finally set V bit in PTE */
+#endif /* CONFIG_SMP */
+
+	sync		/* make sure pte updates get to memory */
+	blr
+
+	.comm	next_slot,4
+	.comm	primary_pteg_full,4
+	.comm	htab_hash_searches,4
+
+/*
+ * Flush the entry for a particular page from the hash table.
+ *
+ * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
+ *		    int count)
+ *
+ * We assume that there is a hash table in use (Hash != 0).
+ */
+_GLOBAL(flush_hash_pages)
+	tophys(r7,0)
+
+	/*
+	 * We disable interrupts here, even on UP, because we want
+	 * the _PAGE_HASHPTE bit to be a reliable indication of
+	 * whether the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	/* First find a PTE in the range that has _PAGE_HASHPTE set */
+	rlwimi	r5,r4,22,20,29
+1:	lwz	r0,0(r5)
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	2f
+	ble	cr1,19f
+	addi	r4,r4,0x1000
+	addi	r5,r5,4
+	addi	r6,r6,-1
+	b	1b
+
+	/* Convert context and va to VSID */
+2:	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note code below trims to 24 bits */
+
+	/* Construct the high word of the PPC-style PTE (r11) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r11,r3,12		/* shift vsid into position */
+	rlwimi	r11,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r11)			/* set V (valid) bit */
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+	rlwinm	r8,r1,0,0,18
+	add	r8,r8,r7
+	lwz	r8,TI_CPU(r8)
+	oris	r8,r8,9
+10:	lwarx	r0,0,r9
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
+	 * already clear, we're done (for this pte).  If not,
+	 * clear it (atomically) and proceed.  -- paulus.
+	 */
+33:	lwarx	r8,0,r5			/* fetch the pte */
+	andi.	r0,r8,_PAGE_HASHPTE
+	beq	8f			/* done if HASHPTE is already clear */
+	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
+	stwcx.	r8,0,r5			/* update the pte */
+	bne-	33b
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(flush_hash_patch_A)
+	addis	r8,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r8,r0,r8		/* make primary hash */
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	li	r0,8			/* PTEs/group */
+	mtctr	r0
+	addi	r12,r8,-PTE_SIZE
+1:	LDPTEu	r0,PTE_SIZE(r12)	/* get next PTE */
+	CMPPTE	0,r0,r11
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	3f
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
+	li	r0,8			/* PTEs/group */
+_GLOBAL(flush_hash_patch_B)
+	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
+	xori	r12,r12,(-PTEG_SIZE & 0xffff)
+	addi	r12,r12,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r0,PTE_SIZE(r12)
+	CMPPTE	0,r0,r11
+	bdnzf	2,2b
+	xori	r11,r11,PTE_H		/* clear H again */
+	bne-	4f			/* should rarely fail to find it */
+
+3:	li	r0,0
+	STPTE	r0,0(r12)		/* invalidate entry */
+4:	sync
+	tlbie	r4			/* in hw tlb too */
+	sync
+
+8:	ble	cr1,9f			/* if all ptes checked */
+81:	addi	r6,r6,-1
+	addi	r5,r5,4			/* advance to next pte */
+	addi	r4,r4,0x1000
+	lwz	r0,0(r5)		/* check next pte */
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	33b
+	bgt	cr1,81b
+
+9:
+#ifdef CONFIG_SMP
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+19:	mtmsr	r10
+	SYNC_601
+	isync
+	blr
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
new file mode 100644
index 00000000000..f4d983a6e52
--- /dev/null
+++ b/arch/powerpc/mm/init.c
@@ -0,0 +1,581 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+
+#include "mem_pieces.h"
+#include "mmu_decl.h"
+
+#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
+/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
+#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
+#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
+#endif
+#endif
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+unsigned long total_memory;
+unsigned long total_lowmem;
+
+unsigned long ppc_memstart;
+unsigned long ppc_memoffset = PAGE_OFFSET;
+
+int mem_init_done;
+int init_bootmem_done;
+int boot_mapsize;
+#ifdef CONFIG_PPC_PMAC
+unsigned long agp_special_page;
+#endif
+
+extern char _end[];
+extern char etext[], _stext[];
+extern char __init_begin, __init_end;
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
+#endif
+
+void MMU_init(void);
+void set_phys_avail(unsigned long total_ram);
+
+/* XXX should be in current.h  -- paulus */
+extern struct task_struct *current_set[NR_CPUS];
+
+char *klimit = _end;
+struct mem_pieces phys_avail;
+struct device_node *memory_node;
+
+/*
+ * this tells the system to map all of ram with the segregs
+ * (i.e. page tables) instead of the bats.
+ * -- Cort
+ */
+int __map_without_bats;
+int __map_without_ltlbs;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+/* max amount of low RAM to map in */
+unsigned long __max_low_memory = MAX_LOW_MEM;
+
+/*
+ * Read in a property describing some pieces of memory.
+ */
+static int __init get_mem_prop(char *name, struct mem_pieces *mp)
+{
+	struct reg_property *rp;
+	int i, s;
+	unsigned int *ip;
+	int nac = prom_n_addr_cells(memory_node);
+	int nsc = prom_n_size_cells(memory_node);
+
+	ip = (unsigned int *) get_property(memory_node, name, &s);
+	if (ip == NULL) {
+		printk(KERN_ERR "error: couldn't get %s property on /memory\n",
+		       name);
+		return 0;
+	}
+	s /= (nsc + nac) * 4;
+	rp = mp->regions;
+	for (i = 0; i < s; ++i, ip += nac+nsc) {
+		if (nac >= 2 && ip[nac-2] != 0)
+			continue;
+		rp->address = ip[nac-1];
+		if (nsc >= 2 && ip[nac+nsc-2] != 0)
+			rp->size = ~0U;
+		else
+			rp->size = ip[nac+nsc-1];
+		++rp;
+	}
+	mp->n_regions = rp - mp->regions;
+
+	/* Make sure the pieces are sorted. */
+	mem_pieces_sort(mp);
+	mem_pieces_coalesce(mp);
+	return 1;
+}
+
+/*
+ * Collect information about physical RAM and which pieces are
+ * already in use from the device tree.
+ */
+unsigned long __init find_end_of_memory(void)
+{
+	unsigned long a, total;
+	struct mem_pieces phys_mem;
+
+	/*
+	 * Find out where physical memory is, and check that it
+	 * starts at 0 and is contiguous.  It seems that RAM is
+	 * always physically contiguous on Power Macintoshes.
+	 *
+	 * Supporting discontiguous physical memory isn't hard,
+	 * it just makes the virtual <-> physical mapping functions
+	 * more complicated (or else you end up wasting space
+	 * in mem_map).
+	 */
+	memory_node = find_devices("memory");
+	if (memory_node == NULL || !get_mem_prop("reg", &phys_mem)
+	    || phys_mem.n_regions == 0)
+		panic("No RAM??");
+	a = phys_mem.regions[0].address;
+	if (a != 0)
+		panic("RAM doesn't start at physical address 0");
+	total = phys_mem.regions[0].size;
+
+	if (phys_mem.n_regions > 1) {
+		printk("RAM starting at 0x%x is not contiguous\n",
+		       phys_mem.regions[1].address);
+		printk("Using RAM from 0 to 0x%lx\n", total-1);
+	}
+
+	return total;
+}
+
+/*
+ * Check for command-line options that affect what MMU_init will do.
+ */
+void MMU_setup(void)
+{
+	/* Check for nobats option (used in mapin_ram). */
+	if (strstr(cmd_line, "nobats")) {
+		__map_without_bats = 1;
+	}
+
+	if (strstr(cmd_line, "noltlbs")) {
+		__map_without_ltlbs = 1;
+	}
+
+	/* Look for mem= option on command line */
+	if (strstr(cmd_line, "mem=")) {
+		char *p, *q;
+		unsigned long maxmem = 0;
+
+		for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
+			q = p + 4;
+			if (p > cmd_line && p[-1] != ' ')
+				continue;
+			maxmem = simple_strtoul(q, &q, 0);
+			if (*q == 'k' || *q == 'K') {
+				maxmem <<= 10;
+				++q;
+			} else if (*q == 'm' || *q == 'M') {
+				maxmem <<= 20;
+				++q;
+			}
+		}
+		__max_memory = maxmem;
+	}
+}
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+void __init MMU_init(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:enter", 0x111);
+
+	/* parse args from command line */
+	MMU_setup();
+
+	/*
+	 * Figure out how much memory we have, how much
+	 * is lowmem, and how much is highmem.  If we were
+	 * passed the total memory size from the bootloader,
+	 * just use it.
+	 */
+	if (boot_mem_size)
+		total_memory = boot_mem_size;
+	else
+		total_memory = ppc_md.find_end_of_memory();
+
+	if (__max_memory && total_memory > __max_memory)
+		total_memory = __max_memory;
+	total_lowmem = total_memory;
+#ifdef CONFIG_FSL_BOOKE
+	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
+	 * entries, so we need to adjust lowmem to match the amount we can map
+	 * in the fixed entries */
+	adjust_total_lowmem();
+#endif /* CONFIG_FSL_BOOKE */
+	if (total_lowmem > __max_low_memory) {
+		total_lowmem = __max_low_memory;
+#ifndef CONFIG_HIGHMEM
+		total_memory = total_lowmem;
+#endif /* CONFIG_HIGHMEM */
+	}
+	set_phys_avail(total_lowmem);
+
+	/* Initialize the MMU hardware */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:hw init", 0x300);
+	MMU_init_hw();
+
+	/* Map in all of RAM starting at KERNELBASE */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:mapin", 0x301);
+	mapin_ram();
+
+#ifdef CONFIG_HIGHMEM
+	ioremap_base = PKMAP_BASE;
+#else
+	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
+#endif /* CONFIG_HIGHMEM */
+	ioremap_bot = ioremap_base;
+
+	/* Map in I/O resources */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:setio", 0x302);
+	if (ppc_md.setup_io_mappings)
+		ppc_md.setup_io_mappings();
+
+	/* Initialize the context management stuff */
+	mmu_context_init();
+
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:exit", 0x211);
+
+#ifdef CONFIG_BOOTX_TEXT
+	/* By default, we are no longer mapped */
+       	boot_text_mapped = 0;
+	/* Must be done last, or ppc_md.progress will die. */
+	map_boot_text();
+#endif
+}
+
+/* This is only called until mem_init is done. */
+void __init *early_get_page(void)
+{
+	void *p;
+
+	if (init_bootmem_done) {
+		p = alloc_bootmem_pages(PAGE_SIZE);
+	} else {
+		p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE);
+	}
+	return p;
+}
+
+/* Free up now-unused memory */
+static void free_sec(unsigned long start, unsigned long end, const char *name)
+{
+	unsigned long cnt = 0;
+
+	while (start < end) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		cnt++;
+		start += PAGE_SIZE;
+ 	}
+	if (cnt) {
+		printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
+		totalram_pages += cnt;
+	}
+}
+
+void free_initmem(void)
+{
+#define FREESEC(TYPE) \
+	free_sec((unsigned long)(&__ ## TYPE ## _begin), \
+		 (unsigned long)(&__ ## TYPE ## _end), \
+		 #TYPE);
+
+	printk ("Freeing unused kernel memory:");
+	FREESEC(init);
+ 	printk("\n");
+	ppc_md.progress = NULL;
+#undef FREESEC
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+void __init do_init_bootmem(void)
+{
+	unsigned long start, size;
+	int i;
+
+	/*
+	 * Find an area to use for the bootmem bitmap.
+	 * We look for the first area which is at least
+	 * 128kB in length (128kB is enough for a bitmap
+	 * for 4GB of memory, using 4kB pages), plus 1 page
+	 * (in case the address isn't page-aligned).
+	 */
+	start = 0;
+	size = 0;
+	for (i = 0; i < phys_avail.n_regions; ++i) {
+		unsigned long a = phys_avail.regions[i].address;
+		unsigned long s = phys_avail.regions[i].size;
+		if (s <= size)
+			continue;
+		start = a;
+		size = s;
+		if (s >= 33 * PAGE_SIZE)
+			break;
+	}
+	start = PAGE_ALIGN(start);
+
+	min_low_pfn = start >> PAGE_SHIFT;
+	max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT;
+	max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT;
+	boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn,
+					 PPC_MEMSTART >> PAGE_SHIFT,
+					 max_low_pfn);
+
+	/* remove the bootmem bitmap from the available memory */
+	mem_pieces_remove(&phys_avail, start, boot_mapsize, 1);
+
+	/* add everything in phys_avail into the bootmem map */
+	for (i = 0; i < phys_avail.n_regions; ++i)
+		free_bootmem(phys_avail.regions[i].address,
+			     phys_avail.regions[i].size);
+
+	init_bootmem_done = 1;
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES], i;
+
+#ifdef CONFIG_HIGHMEM
+	map_page(PKMAP_BASE, 0, 0);	/* XXX gross */
+	pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
+	map_page(KMAP_FIX_BEGIN, 0, 0);	/* XXX gross */
+	kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
+	kmap_prot = PAGE_KERNEL;
+#endif /* CONFIG_HIGHMEM */
+
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
+	for (i = 1; i < MAX_NR_ZONES; i++)
+		zones_size[i] = 0;
+
+#ifdef CONFIG_HIGHMEM
+	zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+
+	free_area_init(zones_size);
+}
+
+void __init mem_init(void)
+{
+	unsigned long addr;
+	int codepages = 0;
+	int datapages = 0;
+	int initpages = 0;
+#ifdef CONFIG_HIGHMEM
+	unsigned long highmem_mapnr;
+
+	highmem_mapnr = total_lowmem >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+	max_mapnr = total_memory >> PAGE_SHIFT;
+
+	high_memory = (void *) __va(PPC_MEMSTART + total_lowmem);
+	num_physpages = max_mapnr;	/* RAM is assumed contiguous */
+
+	totalram_pages += free_all_bootmem();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* if we are booted from BootX with an initial ramdisk,
+	   make sure the ramdisk pages aren't reserved. */
+	if (initrd_start) {
+		for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE)
+			ClearPageReserved(virt_to_page(addr));
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+#ifdef CONFIG_PPC_OF
+	/* mark the RTAS pages as reserved */
+	if ( rtas_data )
+		for (addr = (ulong)__va(rtas_data);
+		     addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ;
+		     addr += PAGE_SIZE)
+			SetPageReserved(virt_to_page(addr));
+#endif
+#ifdef CONFIG_PPC_PMAC
+	if (agp_special_page)
+		SetPageReserved(virt_to_page(agp_special_page));
+#endif
+	for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory;
+	     addr += PAGE_SIZE) {
+		if (!PageReserved(virt_to_page(addr)))
+			continue;
+		if (addr < (ulong) etext)
+			codepages++;
+		else if (addr >= (unsigned long)&__init_begin
+			 && addr < (unsigned long)&__init_end)
+			initpages++;
+		else if (addr < (ulong) klimit)
+			datapages++;
+	}
+
+#ifdef CONFIG_HIGHMEM
+	{
+		unsigned long pfn;
+
+		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
+			struct page *page = mem_map + pfn;
+
+			ClearPageReserved(page);
+			set_page_count(page, 1);
+			__free_page(page);
+			totalhigh_pages++;
+		}
+		totalram_pages += totalhigh_pages;
+	}
+#endif /* CONFIG_HIGHMEM */
+
+        printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
+	       (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
+	       codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
+	       initpages<< (PAGE_SHIFT-10),
+	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+
+#ifdef CONFIG_PPC_PMAC
+	if (agp_special_page)
+		printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
+#endif
+
+	mem_init_done = 1;
+}
+
+/*
+ * Set phys_avail to the amount of physical memory,
+ * less the kernel text/data/bss.
+ */
+void __init
+set_phys_avail(unsigned long total_memory)
+{
+	unsigned long kstart, ksize;
+
+	/*
+	 * Initially, available physical memory is equivalent to all
+	 * physical memory.
+	 */
+
+	phys_avail.regions[0].address = PPC_MEMSTART;
+	phys_avail.regions[0].size = total_memory;
+	phys_avail.n_regions = 1;
+
+	/*
+	 * Map out the kernel text/data/bss from the available physical
+	 * memory.
+	 */
+
+	kstart = __pa(_stext);	/* should be 0 */
+	ksize = PAGE_ALIGN(klimit - _stext);
+
+	mem_pieces_remove(&phys_avail, kstart, ksize, 0);
+	mem_pieces_remove(&phys_avail, 0, 0x4000, 0);
+
+#if defined(CONFIG_BLK_DEV_INITRD)
+	/* Remove the init RAM disk from the available memory. */
+	if (initrd_start) {
+		mem_pieces_remove(&phys_avail, __pa(initrd_start),
+				  initrd_end - initrd_start, 1);
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+#ifdef CONFIG_PPC_OF
+	/* remove the RTAS pages from the available memory */
+	if (rtas_data)
+		mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1);
+#endif
+#ifdef CONFIG_PPC_PMAC
+	/* Because of some uninorth weirdness, we need a page of
+	 * memory as high as possible (it must be outside of the
+	 * bus address seen as the AGP aperture). It will be used
+	 * by the r128 DRM driver
+	 *
+	 * FIXME: We need to make sure that page doesn't overlap any of the\
+	 * above. This could be done by improving mem_pieces_find to be able
+	 * to do a backward search from the end of the list.
+	 */
+	if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) {
+		agp_special_page = (total_memory - PAGE_SIZE);
+		mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0);
+		agp_special_page = (unsigned long)__va(agp_special_page);
+	}
+#endif /* CONFIG_PPC_PMAC */
+}
+
+/* Mark some memory as reserved by removing it from phys_avail. */
+void __init reserve_phys_mem(unsigned long start, unsigned long size)
+{
+	mem_pieces_remove(&phys_avail, start, size, 1);
+}
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c
new file mode 100644
index 00000000000..81f6745b31e
--- /dev/null
+++ b/arch/powerpc/mm/init64.c
@@ -0,0 +1,385 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+unsigned long _SDR1=0;
+unsigned long _ASR=0;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+/*
+ * Do very early mm setup.
+ */
+void __init mm_init_ppc64(void)
+{
+#ifndef CONFIG_PPC_ISERIES
+	unsigned long i;
+#endif
+
+	ppc64_boot_msg(0x100, "MM Init");
+
+	/* This is the story of the IO hole... please, keep seated,
+	 * unfortunately, we are out of oxygen masks at the moment.
+	 * So we need some rough way to tell where your big IO hole
+	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
+	 * that area as well, on POWER4 we don't have one, etc...
+	 * We need that as a "hint" when sizing the TCE table on POWER3
+	 * So far, the simplest way that seem work well enough for us it
+	 * to just assume that the first discontinuity in our physical
+	 * RAM layout is the IO hole. That may not be correct in the future
+	 * (and isn't on iSeries but then we don't care ;)
+	 */
+
+#ifndef CONFIG_PPC_ISERIES
+	for (i = 1; i < lmb.memory.cnt; i++) {
+		unsigned long base, prevbase, prevsize;
+
+		prevbase = lmb.memory.region[i-1].base;
+		prevsize = lmb.memory.region[i-1].size;
+		base = lmb.memory.region[i].base;
+		if (base > (prevbase + prevsize)) {
+			io_hole_start = prevbase + prevsize;
+			io_hole_size = base  - (prevbase + prevsize);
+			break;
+		}
+	}
+#endif /* CONFIG_PPC_ISERIES */
+	if (io_hole_start)
+		printk("IO Hole assumed to be %lx -> %lx\n",
+		       io_hole_start, io_hole_start + io_hole_size - 1);
+
+	ppc64_boot_msg(0x100, "MM Init Done");
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)__init_begin;
+	for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk ("Freeing unused kernel memory: %luk freed\n",
+		((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+void __init do_init_bootmem(void)
+{
+	unsigned long i;
+	unsigned long start, bootmap_pages;
+	unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	int boot_mapsize;
+
+	/*
+	 * Find an area to use for the bootmem bitmap.  Calculate the size of
+	 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
+	 * Add 1 additional page in case the address isn't page-aligned.
+	 */
+	bootmap_pages = bootmem_bootmap_pages(total_pages);
+
+	start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
+	BUG_ON(!start);
+
+	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+
+	max_pfn = max_low_pfn;
+
+	/* Add all physical memory to the bootmem map, mark each area
+	 * present.
+	 */
+	for (i=0; i < lmb.memory.cnt; i++)
+		free_bootmem(lmb.memory.region[i].base,
+			     lmb_size_bytes(&lmb.memory, i));
+
+	/* reserve the sections we're already using */
+	for (i=0; i < lmb.reserved.cnt; i++)
+		reserve_bootmem(lmb.reserved.region[i].base,
+				lmb_size_bytes(&lmb.reserved, i));
+
+	for (i=0; i < lmb.memory.cnt; i++)
+		memory_present(0, lmb_start_pfn(&lmb.memory, i),
+			       lmb_end_pfn(&lmb.memory, i));
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES];
+	unsigned long zholes_size[MAX_NR_ZONES];
+	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long top_of_ram = lmb_end_of_DRAM();
+
+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	       top_of_ram, total_ram);
+	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	       (top_of_ram - total_ram) >> 20);
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	memset(zones_size, 0, sizeof(zones_size));
+	memset(zholes_size, 0, sizeof(zholes_size));
+
+	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+
+	free_area_init_node(0, NODE_DATA(0), zones_size,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
+}
+#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
+
+static struct kcore_list kcore_vmem;
+
+static int __init setup_kcore(void)
+{
+	int i;
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base, size;
+		struct kcore_list *kcore_mem;
+
+		base = lmb.memory.region[i].base;
+		size = lmb.memory.region[i].size;
+
+		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
+		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
+		if (!kcore_mem)
+			panic("mem_init: kmalloc failed\n");
+
+		kclist_add(kcore_mem, __va(base), size);
+	}
+
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+
+	return 0;
+}
+module_init(setup_kcore);
+
+void __init mem_init(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int nid;
+#endif
+	pg_data_t *pgdat;
+	unsigned long i;
+	struct page *page;
+	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
+
+	num_physpages = max_low_pfn;	/* RAM is assumed contiguous */
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+        for_each_online_node(nid) {
+		if (NODE_DATA(nid)->node_spanned_pages != 0) {
+			printk("freeing bootmem node %x\n", nid);
+			totalram_pages +=
+				free_all_bootmem_node(NODE_DATA(nid));
+		}
+	}
+#else
+	max_mapnr = num_physpages;
+	totalram_pages += free_all_bootmem();
+#endif
+
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			if (PageReserved(page))
+				reservedpages++;
+		}
+	}
+
+	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
+	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
+	datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
+	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
+	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		bsssize >> 10,
+		initsize >> 10);
+
+	mem_init_done = 1;
+
+	/* Initialize the vDSO */
+	vdso_init();
+}
+
+void __iomem * reserve_phb_iospace(unsigned long size)
+{
+	void __iomem *virt_addr;
+		
+	if (phbs_io_bot >= IMALLOC_BASE) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void __iomem *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
+
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+{
+	memset(addr, 0, kmem_cache_size(cache));
+}
+
+static const int pgtable_cache_size[2] = {
+	PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+	"pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
+void pgtable_cache_init(void)
+{
+	int i;
+
+	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+		int size = pgtable_cache_size[i];
+		const char *name = pgtable_cache_name[i];
+
+		pgtable_cache[i] = kmem_cache_create(name,
+						     size, size,
+						     SLAB_HWCACHE_ALIGN
+						     | SLAB_MUST_HWCACHE_ALIGN,
+						     zero_ctor,
+						     NULL);
+		if (! pgtable_cache[i])
+			panic("pgtable_cache_init(): could not create %s!\n",
+			      name);
+	}
+}
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
new file mode 100644
index 00000000000..345db08e5d2
--- /dev/null
+++ b/arch/powerpc/mm/mem.c
@@ -0,0 +1,299 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+
+#include "mem_pieces.h"
+#include "mmu_decl.h"
+
+#ifndef CPU_FTR_COHERENT_ICACHE
+#define CPU_FTR_COHERENT_ICACHE	0	/* XXX for now */
+#define CPU_FTR_NOEXECUTE	0
+#endif
+
+/*
+ * This is called by /dev/mem to know if a given address has to
+ * be mapped non-cacheable or not
+ */
+int page_is_ram(unsigned long pfn)
+{
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+
+#ifndef CONFIG_PPC64	/* XXX for now */
+	return paddr < __pa(high_memory);
+#else
+	int i;
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base;
+
+		base = lmb.memory.region[i].base;
+
+		if ((paddr >= base) &&
+			(paddr < (base + lmb.memory.region[i].size))) {
+			return 1;
+		}
+	}
+
+	return 0;
+#endif
+}
+EXPORT_SYMBOL(page_is_ram);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+void show_mem(void)
+{
+	unsigned long total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	unsigned long highmem = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			total++;
+			if (PageHighMem(page))
+				highmem++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n", total);
+#ifdef CONFIG_HIGHMEM
+	printk("%ld pages of HIGHMEM\n", highmem);
+#endif
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &page->flags))
+		clear_bit(PG_arch_1, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void flush_dcache_icache_page(struct page *page)
+{
+#ifdef CONFIG_BOOKE
+	void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
+	__flush_dcache_icache(start);
+	kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
+#elif defined(CONFIG_8xx)
+	/* On 8xx there is no need to kmap since highmem is not supported */
+	__flush_dcache_icache(page_address(page)); 
+#else
+	__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+#endif
+
+}
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/*
+	 * We shouldnt have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	unsigned long maddr;
+
+	maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(maddr, maddr + len);
+	kunmap(page);
+}
+EXPORT_SYMBOL(flush_icache_user_range);
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ * 
+ * This must always be called with the mm->page_table_lock held
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t pte)
+{
+	/* handle i-cache coherency */
+	unsigned long pfn = pte_pfn(pte);
+#ifdef CONFIG_PPC32
+	pmd_t *pmd;
+#else
+	unsigned long vsid;
+	void *pgdir;
+	pte_t *ptep;
+	int local = 0;
+	cpumask_t tmp;
+	unsigned long flags;
+#endif
+
+	/* handle i-cache coherency */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+	    !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    pfn_valid(pfn)) {
+		struct page *page = pfn_to_page(pfn);
+		if (!PageReserved(page)
+		    && !test_bit(PG_arch_1, &page->flags)) {
+			if (vma->vm_mm == current->active_mm) {
+#ifdef CONFIG_8xx
+			/* On 8xx, cache control instructions (particularly 
+		 	 * "dcbst" from flush_dcache_icache) fault as write 
+			 * operation if there is an unpopulated TLB entry 
+			 * for the address in question. To workaround that, 
+			 * we invalidate the TLB here, thus avoiding dcbst 
+			 * misbehaviour.
+			 */
+				_tlbie(address);
+#endif
+				__flush_dcache_icache((void *) address);
+			} else
+				flush_dcache_icache_page(page);
+			set_bit(PG_arch_1, &page->flags);
+		}
+	}
+
+#ifdef CONFIG_PPC_STD_MMU
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(pte) || address >= TASK_SIZE)
+		return;
+#ifdef CONFIG_PPC32
+	if (Hash == 0)
+		return;
+	pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
+	if (!pmd_none(*pmd))
+		add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
+#else
+	pgdir = vma->vm_mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
+	vsid = get_vsid(vma->vm_mm->context.id, ea);
+
+	local_irq_save(flags);
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+		    0x300, local);
+	local_irq_restore(flags);
+#endif
+#endif
+}
diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c
new file mode 100644
index 00000000000..ef765a84433
--- /dev/null
+++ b/arch/powerpc/mm/mem64.c
@@ -0,0 +1,259 @@
+/*
+ *  PowerPC version 
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+/*
+ * This is called by /dev/mem to know if a given address has to
+ * be mapped non-cacheable or not
+ */
+int page_is_ram(unsigned long pfn)
+{
+	int i;
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base;
+
+		base = lmb.memory.region[i].base;
+
+		if ((paddr >= base) &&
+			(paddr < (base + lmb.memory.region[i].size))) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(page_is_ram);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+void show_mem(void)
+{
+	unsigned long total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n", total);
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &page->flags))
+		clear_bit(PG_arch_1, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/*
+	 * We shouldnt have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	unsigned long maddr;
+
+	maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(maddr, maddr + len);
+}
+EXPORT_SYMBOL(flush_icache_user_range);
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ * 
+ * This must always be called with the mm->page_table_lock held
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
+		      pte_t pte)
+{
+	unsigned long vsid;
+	void *pgdir;
+	pte_t *ptep;
+	int local = 0;
+	cpumask_t tmp;
+	unsigned long flags;
+
+	/* handle i-cache coherency */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+	    !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			if (!PageReserved(page)
+			    && !test_bit(PG_arch_1, &page->flags)) {
+				__flush_dcache_icache(page_address(page));
+				set_bit(PG_arch_1, &page->flags);
+			}
+		}
+	}
+
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(pte))
+		return;
+
+	pgdir = vma->vm_mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
+	vsid = get_vsid(vma->vm_mm->context.id, ea);
+
+	local_irq_save(flags);
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+		    0x300, local);
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c
new file mode 100644
index 00000000000..3d639052017
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.c
@@ -0,0 +1,163 @@
+/*
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Changes to accommodate Power Macintoshes.
+ *    Cort Dougan <cort@cs.nmt.edu>
+ *      Rewrites.
+ *    Grant Erickson <grant@lcse.umn.edu>
+ *      General rework and split from mm/init.c.
+ *
+ *    Module name: mem_pieces.c
+ *
+ *    Description:
+ *      Routines and data structures for manipulating and representing
+ *      phyiscal memory extents (i.e. address/length pairs).
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <asm/page.h>
+
+#include "mem_pieces.h"
+
+extern struct mem_pieces phys_avail;
+
+static void mem_pieces_print(struct mem_pieces *);
+
+/*
+ * Scan a region for a piece of a given size with the required alignment.
+ */
+void __init *
+mem_pieces_find(unsigned int size, unsigned int align)
+{
+	int i;
+	unsigned a, e;
+	struct mem_pieces *mp = &phys_avail;
+
+	for (i = 0; i < mp->n_regions; ++i) {
+		a = mp->regions[i].address;
+		e = a + mp->regions[i].size;
+		a = (a + align - 1) & -align;
+		if (a + size <= e) {
+			mem_pieces_remove(mp, a, size, 1);
+			return (void *) __va(a);
+		}
+	}
+	panic("Couldn't find %u bytes at %u alignment\n", size, align);
+
+	return NULL;
+}
+
+/*
+ * Remove some memory from an array of pieces
+ */
+void __init
+mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size,
+		  int must_exist)
+{
+	int i, j;
+	unsigned int end, rs, re;
+	struct reg_property *rp;
+
+	end = start + size;
+	for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) {
+		if (end > rp->address && start < rp->address + rp->size)
+			break;
+	}
+	if (i >= mp->n_regions) {
+		if (must_exist)
+			printk("mem_pieces_remove: [%x,%x) not in any region\n",
+			       start, end);
+		return;
+	}
+	for (; i < mp->n_regions && end > rp->address; ++i, ++rp) {
+		rs = rp->address;
+		re = rs + rp->size;
+		if (must_exist && (start < rs || end > re)) {
+			printk("mem_pieces_remove: bad overlap [%x,%x) with",
+			       start, end);
+			mem_pieces_print(mp);
+			must_exist = 0;
+		}
+		if (start > rs) {
+			rp->size = start - rs;
+			if (end < re) {
+				/* need to split this entry */
+				if (mp->n_regions >= MEM_PIECES_MAX)
+					panic("eek... mem_pieces overflow");
+				for (j = mp->n_regions; j > i + 1; --j)
+					mp->regions[j] = mp->regions[j-1];
+				++mp->n_regions;
+				rp[1].address = end;
+				rp[1].size = re - end;
+			}
+		} else {
+			if (end < re) {
+				rp->address = end;
+				rp->size = re - end;
+			} else {
+				/* need to delete this entry */
+				for (j = i; j < mp->n_regions - 1; ++j)
+					mp->regions[j] = mp->regions[j+1];
+				--mp->n_regions;
+				--i;
+				--rp;
+			}
+		}
+	}
+}
+
+static void __init
+mem_pieces_print(struct mem_pieces *mp)
+{
+	int i;
+
+	for (i = 0; i < mp->n_regions; ++i)
+		printk(" [%x, %x)", mp->regions[i].address,
+		       mp->regions[i].address + mp->regions[i].size);
+	printk("\n");
+}
+
+void __init
+mem_pieces_sort(struct mem_pieces *mp)
+{
+	unsigned long a, s;
+	int i, j;
+
+	for (i = 1; i < mp->n_regions; ++i) {
+		a = mp->regions[i].address;
+		s = mp->regions[i].size;
+		for (j = i - 1; j >= 0; --j) {
+			if (a >= mp->regions[j].address)
+				break;
+			mp->regions[j+1] = mp->regions[j];
+		}
+		mp->regions[j+1].address = a;
+		mp->regions[j+1].size = s;
+	}
+}
+
+void __init
+mem_pieces_coalesce(struct mem_pieces *mp)
+{
+	unsigned long a, s, ns;
+	int i, j, d;
+
+	d = 0;
+	for (i = 0; i < mp->n_regions; i = j) {
+		a = mp->regions[i].address;
+		s = mp->regions[i].size;
+		for (j = i + 1; j < mp->n_regions
+			     && mp->regions[j].address - a <= s; ++j) {
+			ns = mp->regions[j].address + mp->regions[j].size - a;
+			if (ns > s)
+				s = ns;
+		}
+		mp->regions[d].address = a;
+		mp->regions[d].size = s;
+		++d;
+	}
+	mp->n_regions = d;
+}
diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h
new file mode 100644
index 00000000000..e2b700dc7f1
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.h
@@ -0,0 +1,48 @@
+/*
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Changes to accommodate Power Macintoshes.
+ *    Cort Dougan <cort@cs.nmt.edu>
+ *      Rewrites.
+ *    Grant Erickson <grant@lcse.umn.edu>
+ *      General rework and split from mm/init.c.
+ *
+ *    Module name: mem_pieces.h
+ *
+ *    Description:
+ *      Routines and data structures for manipulating and representing
+ *      phyiscal memory extents (i.e. address/length pairs).
+ *
+ */
+
+#ifndef __MEM_PIECES_H__
+#define	__MEM_PIECES_H__
+
+#include <asm/prom.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Type Definitions */
+
+#define	MEM_PIECES_MAX	32
+
+struct mem_pieces {
+    int n_regions;
+    struct reg_property regions[MEM_PIECES_MAX];
+};
+
+/* Function Prototypes */
+
+extern void	*mem_pieces_find(unsigned int size, unsigned int align);
+extern void	 mem_pieces_remove(struct mem_pieces *mp, unsigned int start,
+				   unsigned int size, int must_exist);
+extern void	 mem_pieces_coalesce(struct mem_pieces *mp);
+extern void	 mem_pieces_sort(struct mem_pieces *mp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MEM_PIECES_H__ */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 00000000000..a8816e0f6a8
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,86 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+mm_context_t next_mmu_context;
+unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+#ifdef FEW_CONTEXTS
+atomic_t nr_free_contexts;
+struct mm_struct *context_mm[LAST_CONTEXT+1];
+void steal_context(void);
+#endif /* FEW_CONTEXTS */
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init
+mmu_context_init(void)
+{
+	/*
+	 * Some processors have too few contexts to reserve one for
+	 * init_mm, and require using context 0 for a normal task.
+	 * Other processors reserve the use of context zero for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+#ifdef FEW_CONTEXTS
+	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
+#endif /* FEW_CONTEXTS */
+}
+
+#ifdef FEW_CONTEXTS
+/*
+ * Steal a context from a task that has one at the moment.
+ * This is only used on 8xx and 4xx and we presently assume that
+ * they don't do SMP.  If they do then this will have to check
+ * whether the MM we steal is in use.
+ * We also assume that this is only used on systems that don't
+ * use an MMU hash table - this is true for 8xx and 4xx.
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone was motivated to do it.
+ *  -- paulus
+ */
+void
+steal_context(void)
+{
+	struct mm_struct *mm;
+
+	/* free up context `next_mmu_context' */
+	/* if we shouldn't free context 0, don't... */
+	if (next_mmu_context < FIRST_CONTEXT)
+		next_mmu_context = FIRST_CONTEXT;
+	mm = context_mm[next_mmu_context];
+	flush_tlb_mm(mm);
+	destroy_context(mm);
+}
+#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c
new file mode 100644
index 00000000000..714a84dd8d5
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context64.c
@@ -0,0 +1,63 @@
+/*
+ *  MMU context allocation for 64-bit kernels.
+ *
+ *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <asm/mmu_context.h>
+
+static DEFINE_SPINLOCK(mmu_context_lock);
+static DEFINE_IDR(mmu_context_idr);
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int index;
+	int err;
+
+again:
+	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&mmu_context_lock);
+	err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
+	spin_unlock(&mmu_context_lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > MAX_CONTEXT) {
+		idr_remove(&mmu_context_idr, index);
+		return -ENOMEM;
+	}
+
+	mm->context.id = index;
+
+	return 0;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+	spin_lock(&mmu_context_lock);
+	idr_remove(&mmu_context_idr, mm->context.id);
+	spin_unlock(&mmu_context_lock);
+
+	mm->context.id = NO_CONTEXT;
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
new file mode 100644
index 00000000000..540f3292b22
--- /dev/null
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -0,0 +1,85 @@
+/*
+ * Declarations of procedures and variables shared between files
+ * in arch/ppc/mm/.
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+extern void mapin_ram(void);
+extern int map_page(unsigned long va, phys_addr_t pa, int flags);
+extern void setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags);
+extern void reserve_phys_mem(unsigned long start, unsigned long size);
+extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		      unsigned int size, int flags, unsigned int pid);
+extern void invalidate_tlbcam_entry(int index);
+
+extern int __map_without_bats;
+extern unsigned long ioremap_base;
+extern unsigned long ioremap_bot;
+extern unsigned int rtas_data, rtas_size;
+
+extern unsigned long total_memory;
+extern unsigned long total_lowmem;
+extern int mem_init_done;
+
+extern PTE *Hash, *Hash_end;
+extern unsigned long Hash_size, Hash_mask;
+
+extern unsigned int num_tlbcam_entries;
+
+/* ...and now those things that may be slightly different between processor
+ * architectures.  -- Dan
+ */
+#if defined(CONFIG_8xx)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define MMU_init_hw()		do { } while(0)
+#define mmu_mapin_ram()		(0UL)
+
+#elif defined(CONFIG_4xx)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+
+#elif defined(CONFIG_FSL_BOOKE)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+extern void adjust_total_lowmem(void);
+
+#else
+/* anything except 4xx or 8xx */
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+
+/* Be careful....this needs to be updated if we ever encounter 603 SMPs,
+ * which includes all new 82xx processors.  We need tlbie/tlbsync here
+ * in that case (I think). -- Dan.
+ */
+static inline void flush_HPTE(unsigned context, unsigned long va,
+			      unsigned long pdval)
+{
+	if ((Hash != 0) &&
+	    cpu_has_feature(CPU_FTR_HPTE_TABLE))
+		flush_hash_pages(0, va, pdval, 1);
+	else
+		_tlbie(va);
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
new file mode 100644
index 00000000000..81a3d7446d3
--- /dev/null
+++ b/arch/powerpc/mm/pgtable.c
@@ -0,0 +1,470 @@
+/*
+ * This file contains the routines setting up the linux page tables.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+
+#include "mmu_decl.h"
+
+unsigned long ioremap_base;
+unsigned long ioremap_bot;
+int io_bat_index;
+
+#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+#define HAVE_BATS	1
+#endif
+
+#if defined(CONFIG_FSL_BOOKE)
+#define HAVE_TLBCAM	1
+#endif
+
+extern char etext[], _stext[];
+
+#ifdef CONFIG_SMP
+extern void hash_page_sync(void);
+#endif
+
+#ifdef HAVE_BATS
+extern unsigned long v_mapped_by_bats(unsigned long va);
+extern unsigned long p_mapped_by_bats(unsigned long pa);
+void setbat(int index, unsigned long virt, unsigned long phys,
+	    unsigned int size, int flags);
+
+#else /* !HAVE_BATS */
+#define v_mapped_by_bats(x)	(0UL)
+#define p_mapped_by_bats(x)	(0UL)
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+extern unsigned int tlbcam_index;
+extern unsigned long v_mapped_by_tlbcam(unsigned long va);
+extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
+#else /* !HAVE_TLBCAM */
+#define v_mapped_by_tlbcam(x)	(0UL)
+#define p_mapped_by_tlbcam(x)	(0UL)
+#endif /* HAVE_TLBCAM */
+
+#ifdef CONFIG_PTE_64BIT
+/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
+#define PGDIR_ORDER	1
+#else
+#define PGDIR_ORDER	0
+#endif
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret;
+
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	return ret;
+}
+
+void pgd_free(pgd_t *pgd)
+{
+	free_pages((unsigned long)pgd, PGDIR_ORDER);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	pte_t *pte;
+	extern int mem_init_done;
+	extern void *early_get_page(void);
+
+	if (mem_init_done) {
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	} else {
+		pte = (pte_t *)early_get_page();
+		if (pte)
+			clear_page(pte);
+	}
+	return pte;
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *ptepage;
+
+#ifdef CONFIG_HIGHPTE
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+#else
+	int flags = GFP_KERNEL | __GFP_REPEAT;
+#endif
+
+	ptepage = alloc_pages(flags, 0);
+	if (ptepage)
+		clear_highpage(ptepage);
+	return ptepage;
+}
+
+void pte_free_kernel(pte_t *pte)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	free_page((unsigned long)pte);
+}
+
+void pte_free(struct page *ptepage)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	__free_page(ptepage);
+}
+
+#ifndef CONFIG_PHYS_64BIT
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+#else /* CONFIG_PHYS_64BIT */
+void __iomem *
+ioremap64(unsigned long long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
+
+	return ioremap64(addr64, size);
+}
+#endif /* CONFIG_PHYS_64BIT */
+
+void __iomem *
+__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+	unsigned long v, i;
+	phys_addr_t p;
+	int err;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from ioremap_base
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+	/*
+	 * If the address lies within the first 16 MB, assume it's in ISA
+	 * memory space
+	 */
+	if (p < 16*1024*1024)
+		p += _ISA_MEM_BASE;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 */
+	if ( mem_init_done && (p < virt_to_phys(high_memory)) )
+	{
+		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
+		       __builtin_return_address(0));
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped?  Perhaps overlapped by a previous
+	 * BAT mapping.  If the whole area is mapped then we're done,
+	 * otherwise remap it since we want to keep the virt addrs for
+	 * each request contiguous.
+	 *
+	 * We make the assumption here that if the bottom and top
+	 * of the range we want are mapped then it's mapped to the
+	 * same virt address (and this is contiguous).
+	 *  -- Cort
+	 */
+	if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
+		goto out;
+
+	if ((v = p_mapped_by_tlbcam(p)))
+		goto out;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == 0)
+			return NULL;
+		v = (unsigned long) area->addr;
+	} else {
+		v = (ioremap_bot -= size);
+	}
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+	if (flags & _PAGE_NO_CACHE)
+		flags |= _PAGE_GUARDED;
+
+	/*
+	 * Should check if it is a candidate for a BAT mapping
+	 */
+
+	err = 0;
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
+		err = map_page(v+i, p+i, flags);
+	if (err) {
+		if (mem_init_done)
+			vunmap((void *)v);
+		return NULL;
+	}
+
+out:
+	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	/*
+	 * If mapped by BATs then there is nothing to do.
+	 * Calling vfree() generates a benign warning.
+	 */
+	if (v_mapped_by_bats((unsigned long)addr)) return;
+
+	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
+		vunmap((void *) (PAGE_MASK & (unsigned long)addr));
+}
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) (port + _IO_BASE);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+int
+map_page(unsigned long va, phys_addr_t pa, int flags)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+
+	spin_lock(&init_mm.page_table_lock);
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_offset(pgd_offset_k(va), va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	pg = pte_alloc_kernel(&init_mm, pd, va);
+	if (pg != 0) {
+		err = 0;
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
+		if (mem_init_done)
+			flush_HPTE(0, va, pmd_val(*pd));
+	}
+	spin_unlock(&init_mm.page_table_lock);
+	return err;
+}
+
+/*
+ * Map in all of physical memory starting at KERNELBASE.
+ */
+void __init mapin_ram(void)
+{
+	unsigned long v, p, s, f;
+
+	s = mmu_mapin_ram();
+	v = KERNELBASE + s;
+	p = PPC_MEMSTART + s;
+	for (; s < total_lowmem; s += PAGE_SIZE) {
+		if ((char *) v >= _stext && (char *) v < etext)
+			f = _PAGE_RAM_TEXT;
+		else
+			f = _PAGE_RAM;
+		map_page(v, p, f);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+/* is x a power of 2? */
+#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
+
+/* is x a power of 4? */
+#define is_power_of_4(x)	((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
+
+/*
+ * Set up a mapping for a block of I/O.
+ * virt, phys, size must all be page-aligned.
+ * This should only be called before ioremap is called.
+ */
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags)
+{
+	int i;
+
+	if (virt > KERNELBASE && virt < ioremap_bot)
+		ioremap_bot = ioremap_base = virt;
+
+#ifdef HAVE_BATS
+	/*
+	 * Use a BAT for this if possible...
+	 */
+	if (io_bat_index < 2 && is_power_of_2(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		setbat(io_bat_index, virt, phys, size, flags);
+		++io_bat_index;
+		return;
+	}
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+	/*
+	 * Use a CAM for this if possible...
+	 */
+	if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		settlbcam(tlbcam_index, virt, phys, size, flags, 0);
+		++tlbcam_index;
+		return;
+	}
+#endif /* HAVE_TLBCAM */
+
+	/* No BATs available, put it in the page tables. */
+	for (i = 0; i < size; i += PAGE_SIZE)
+		map_page(virt + i, phys + i, flags);
+}
+
+/* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+ * the PTE pointer is unmodified if PTE is not found.
+ */
+int
+get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+{
+        pgd_t	*pgd;
+        pmd_t	*pmd;
+        pte_t	*pte;
+        int     retval = 0;
+
+        pgd = pgd_offset(mm, addr & PAGE_MASK);
+        if (pgd) {
+                pmd = pmd_offset(pgd, addr & PAGE_MASK);
+                if (pmd_present(*pmd)) {
+                        pte = pte_offset_map(pmd, addr & PAGE_MASK);
+                        if (pte) {
+				retval = 1;
+				*ptep = pte;
+				/* XXX caller needs to do pte_unmap, yuck */
+                        }
+                }
+        }
+        return(retval);
+}
+
+/* Find physical address for this virtual address.  Normally used by
+ * I/O functions, but anyone can call it.
+ */
+unsigned long iopa(unsigned long addr)
+{
+	unsigned long pa;
+
+	/* I don't know why this won't work on PMacs or CHRP.  It
+	 * appears there is some bug, or there is some implicit
+	 * mapping done not properly represented by BATs or in page
+	 * tables.......I am actively working on resolving this, but
+	 * can't hold up other stuff.  -- Dan
+	 */
+	pte_t *pte;
+	struct mm_struct *mm;
+
+	/* Check the BATs */
+	pa = v_mapped_by_bats(addr);
+	if (pa)
+		return pa;
+
+	/* Allow mapping of user addresses (within the thread)
+	 * for DMA if necessary.
+	 */
+	if (addr < TASK_SIZE)
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	pa = 0;
+	if (get_pteptr(mm, addr, &pte)) {
+		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+		pte_unmap(pte);
+	}
+
+	return(pa);
+}
+
+/* This is will find the virtual address for a physical one....
+ * Swiped from APUS, could be dangerous :-).
+ * This is only a placeholder until I really find a way to make this
+ * work.  -- Dan
+ */
+unsigned long
+mm_ptov (unsigned long paddr)
+{
+	unsigned long ret;
+#if 0
+	if (paddr < 16*1024*1024)
+		ret = ZTWO_VADDR(paddr);
+	else {
+		int i;
+
+		for (i = 0; i < kmap_chunk_count;){
+			unsigned long phys = kmap_chunks[i++];
+			unsigned long size = kmap_chunks[i++];
+			unsigned long virt = kmap_chunks[i++];
+			if (paddr >= phys
+			    && paddr < (phys + size)){
+				ret = virt + paddr - phys;
+				goto exit;
+			}
+		}
+	
+		ret = (unsigned long) __va(paddr);
+	}
+exit:
+#ifdef DEBUGPV
+	printk ("PTOV(%lx)=%lx\n", paddr, ret);
+#endif
+#else
+	ret = (unsigned long)paddr + KERNELBASE;
+#endif
+	return ret;
+}
+
diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c
new file mode 100644
index 00000000000..724f97e5dee
--- /dev/null
+++ b/arch/powerpc/mm/pgtable64.c
@@ -0,0 +1,357 @@
+/*
+ *  This file contains ioremap and related functions for 64-bit machines.
+ *
+ *  Derived from arch/ppc64/mm/init.c
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+#ifdef CONFIG_PPC_ISERIES
+
+void __iomem *ioremap(unsigned long addr, unsigned long size)
+{
+	return (void __iomem *)addr;
+}
+
+extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
+		       unsigned long flags)
+{
+	return (void __iomem *)addr;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	return;
+}
+
+#else
+
+/*
+ * map_io_page currently only called by __ioremap
+ * map_io_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+static int map_io_page(unsigned long ea, unsigned long pa, int flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long vsid;
+
+	if (mem_init_done) {
+		spin_lock(&init_mm.page_table_lock);
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+		spin_unlock(&init_mm.page_table_lock);
+	} else {
+		unsigned long va, vpn, hash, hpteg;
+
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 */
+		vsid = get_kernel_vsid(ea);
+		va = (vsid << 28) | (ea & 0xFFFFFFF);
+		vpn = va >> PAGE_SHIFT;
+
+		hash = hpt_hash(vpn, 0);
+
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+		/* Panic if a pte grpup is full */
+		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
+				       HPTE_V_BOLTED,
+				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
+		    == -1) {
+			panic("map_io_page: could not insert mapping");
+		}
+	}
+	return 0;
+}
+
+
+static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
+			    unsigned long ea, unsigned long size,
+			    unsigned long flags)
+{
+	unsigned long i;
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= pgprot_val(PAGE_KERNEL);
+
+	for (i = 0; i < size; i += PAGE_SIZE)
+		if (map_io_page(ea+i, pa+i, flags))
+			return NULL;
+
+	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+}
+
+
+void __iomem *
+ioremap(unsigned long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+void __iomem * __ioremap(unsigned long addr, unsigned long size,
+			 unsigned long flags)
+{
+	unsigned long pa, ea;
+	void __iomem *ret;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the imalloc system is running, we use it.
+	 * Before that, we map using addresses going
+	 * up from ioremap_bot.  imalloc will use
+	 * the addresses from ioremap_bot through
+	 * IMALLOC_END
+	 * 
+	 */
+	pa = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - pa;
+
+	if (size == 0)
+		return NULL;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = im_get_free_area(size);
+		if (area == NULL)
+			return NULL;
+		ea = (unsigned long)(area->addr);
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (!ret)
+			im_free(area->addr);
+	} else {
+		ea = ioremap_bot;
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (ret)
+			ioremap_bot += size;
+	}
+	return ret;
+}
+
+#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+
+int __ioremap_explicit(unsigned long pa, unsigned long ea,
+		       unsigned long size, unsigned long flags)
+{
+	struct vm_struct *area;
+	void __iomem *ret;
+	
+	/* For now, require page-aligned values for pa, ea, and size */
+	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+	    !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (!mem_init_done) {
+		/* Two things to consider in this case:
+		 * 1) No records will be kept (imalloc, etc) that the region
+		 *    has been remapped
+		 * 2) It won't be easy to iounmap() the region later (because
+		 *    of 1)
+		 */
+		;
+	} else {
+		area = im_get_area(ea, size,
+			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
+		if (area == NULL) {
+			/* Expected when PHB-dlpar is in play */
+			return 1;
+		}
+		if (ea != (unsigned long) area->addr) {
+			printk(KERN_ERR "unexpected addr return from "
+			       "im_get_area\n");
+			return 1;
+		}
+	}
+	
+	ret = __ioremap_com(pa, pa, ea, size, flags);
+	if (ret == NULL) {
+		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+		return 1;
+	}
+	if (ret != (void *) ea) {
+		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*  
+ * Unmap an IO region and remove it from imalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ * This code is modeled after vmalloc code - unmap_vm_area()
+ *
+ * XXX	what about calls before mem_init_done (ie python_countermeasures())
+ */
+void iounmap(volatile void __iomem *token)
+{
+	void *addr;
+
+	if (!mem_init_done)
+		return;
+	
+	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
+
+	im_free(addr);
+}
+
+static int iounmap_subset_regions(unsigned long addr, unsigned long size)
+{
+	struct vm_struct *area;
+
+	/* Check whether subsets of this region exist */
+	area = im_get_area(addr, size, IM_REGION_SUPERSET);
+	if (area == NULL)
+		return 1;
+
+	while (area) {
+		iounmap((void __iomem *) area->addr);
+		area = im_get_area(addr, size,
+				IM_REGION_SUPERSET);
+	}
+
+	return 0;
+}
+
+int iounmap_explicit(volatile void __iomem *start, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	int rc;
+	
+	addr = (unsigned long __force) start & PAGE_MASK;
+
+	/* Verify that the region either exists or is a subset of an existing
+	 * region.  In the latter case, split the parent region to create 
+	 * the exact region 
+	 */
+	area = im_get_area(addr, size, 
+			    IM_REGION_EXISTS | IM_REGION_SUBSET);
+	if (area == NULL) {
+		/* Determine whether subset regions exist.  If so, unmap */
+		rc = iounmap_subset_regions(addr, size);
+		if (rc) {
+			printk(KERN_ERR
+			       "%s() cannot unmap nonexistent range 0x%lx\n",
+ 				__FUNCTION__, addr);
+			return 1;
+		}
+	} else {
+		iounmap((void __iomem *) area->addr);
+	}
+	/*
+	 * FIXME! This can't be right:
+	iounmap(area->addr);
+	 * Maybe it should be "iounmap(area);"
+	 */
+	return 0;
+}
+
+#endif
+
+EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c
new file mode 100644
index 00000000000..9a381ed5eb2
--- /dev/null
+++ b/arch/powerpc/mm/ppc_mmu.c
@@ -0,0 +1,296 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/prom.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+
+#include "mmu_decl.h"
+#include "mem_pieces.h"
+
+PTE *Hash, *Hash_end;
+unsigned long Hash_size, Hash_mask;
+unsigned long _SDR1;
+
+union ubat {			/* BAT register values to be loaded */
+	BAT	bat;
+#ifdef CONFIG_PPC64BRIDGE
+	u64	word[2];
+#else
+	u32	word[2];
+#endif
+} BATS[4][2];			/* 4 pairs of IBAT, DBAT */
+
+struct batrange {		/* stores address ranges mapped by BATs */
+	unsigned long start;
+	unsigned long limit;
+	unsigned long phys;
+} bat_addrs[4];
+
+/*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+unsigned long v_mapped_by_bats(unsigned long va)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+			return bat_addrs[b].phys + (va - bat_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_bats(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (pa >= bat_addrs[b].phys
+	    	    && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+		              +bat_addrs[b].phys)
+			return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+	return 0;
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+#ifdef CONFIG_POWER4
+	return 0;
+#else
+	unsigned long tot, bl, done;
+	unsigned long max_size = (256<<20);
+	unsigned long align;
+
+	if (__map_without_bats)
+		return 0;
+
+	/* Set up BAT2 and if necessary BAT3 to cover RAM. */
+
+	/* Make sure we don't map a block larger than the
+	   smallest alignment of the physical address. */
+	/* alignment of PPC_MEMSTART */
+	align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
+	/* set BAT block size to MIN(max_size, align) */
+	if (align && align < max_size)
+		max_size = align;
+
+	tot = total_lowmem;
+	for (bl = 128<<10; bl < max_size; bl <<= 1) {
+		if (bl * 2 > tot)
+			break;
+	}
+
+	setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
+	done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
+	if ((done < tot) && !bat_addrs[3].limit) {
+		/* use BAT3 to cover a bit more */
+		tot -= done;
+		for (bl = 128<<10; bl < max_size; bl <<= 1)
+			if (bl * 2 > tot)
+				break;
+		setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
+		done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
+	}
+
+	return done;
+#endif
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+void __init setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags)
+{
+	unsigned int bl;
+	int wimgxpp;
+	union ubat *bat = BATS[index];
+
+	if (((flags & _PAGE_NO_CACHE) == 0) &&
+	    cpu_has_feature(CPU_FTR_NEED_COHERENT))
+		flags |= _PAGE_COHERENT;
+
+	bl = (size >> 17) - 1;
+	if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+		/* 603, 604, etc. */
+		/* Do DBAT first */
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT | _PAGE_GUARDED);
+		wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+		bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+		bat[1].word[1] = phys | wimgxpp;
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+		if (flags & _PAGE_USER)
+#endif
+			bat[1].bat.batu.vp = 1;
+		if (flags & _PAGE_GUARDED) {
+			/* G bit must be zero in IBATs */
+			bat[0].word[0] = bat[0].word[1] = 0;
+		} else {
+			/* make IBAT same as DBAT */
+			bat[0] = bat[1];
+		}
+	} else {
+		/* 601 cpu */
+		if (bl > BL_8M)
+			bl = BL_8M;
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT);
+		wimgxpp |= (flags & _PAGE_RW)?
+			((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
+		bat->word[0] = virt | wimgxpp | 4;	/* Ks=0, Ku=1 */
+		bat->word[1] = phys | bl | 0x40;	/* V=1 */
+	}
+
+	bat_addrs[index].start = virt;
+	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+	bat_addrs[index].phys = phys;
+}
+
+/*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+void __init MMU_init_hw(void)
+{
+	unsigned int hmask, mb, mb2;
+	unsigned int n_hpteg, lg_n_hpteg;
+
+	extern unsigned int hash_page_patch_A[];
+	extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
+	extern unsigned int hash_page[];
+	extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
+
+	if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
+		/*
+		 * Put a blr (procedure return) instruction at the
+		 * start of hash_page, since we can still get DSI
+		 * exceptions on a 603.
+		 */
+		hash_page[0] = 0x4e800020;
+		flush_icache_range((unsigned long) &hash_page[0],
+				   (unsigned long) &hash_page[1]);
+		return;
+	}
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+#ifdef CONFIG_PPC64BRIDGE
+#define LG_HPTEG_SIZE	7		/* 128 bytes per HPTEG */
+#define SDR1_LOW_BITS	(lg_n_hpteg - 11)
+#define MIN_N_HPTEG	2048		/* min 256kB hash table */
+#else
+#define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
+#define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
+#define MIN_N_HPTEG	1024		/* min 64kB hash table */
+#endif
+
+#ifdef CONFIG_POWER4
+	/* The hash table has already been allocated and initialized
+	   in prom.c */
+	n_hpteg = Hash_size >> LG_HPTEG_SIZE;
+	lg_n_hpteg = __ilog2(n_hpteg);
+
+	/* Remove the hash table from the available memory */
+	if (Hash)
+		reserve_phys_mem(__pa(Hash), Hash_size);
+
+#else /* CONFIG_POWER4 */
+	/*
+	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+	 * This is less than the recommended amount, but then
+	 * Linux ain't AIX.
+	 */
+	n_hpteg = total_memory / (PAGE_SIZE * 8);
+	if (n_hpteg < MIN_N_HPTEG)
+		n_hpteg = MIN_N_HPTEG;
+	lg_n_hpteg = __ilog2(n_hpteg);
+	if (n_hpteg & (n_hpteg - 1)) {
+		++lg_n_hpteg;		/* round up if not power of 2 */
+		n_hpteg = 1 << lg_n_hpteg;
+	}
+	Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+	/*
+	 * Find some memory for the hash table.
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+	Hash = mem_pieces_find(Hash_size, Hash_size);
+	cacheable_memzero(Hash, Hash_size);
+	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+#endif /* CONFIG_POWER4 */
+
+	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
+
+	printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
+	       total_memory >> 20, Hash_size >> 10, Hash);
+
+
+	/*
+	 * Patch up the instructions in hashtable.S:create_hpte
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
+	Hash_mask = n_hpteg - 1;
+	hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+	mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+	if (lg_n_hpteg > 16)
+		mb2 = 16 - LG_HPTEG_SIZE;
+
+	hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
+	hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
+	hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
+
+	/*
+	 * Ensure that the locations we've patched have been written
+	 * out from the data cache and invalidated in the instruction
+	 * cache, on those machines with split caches.
+	 */
+	flush_icache_range((unsigned long) &hash_page_patch_A[0],
+			   (unsigned long) &hash_page_patch_C[1]);
+
+	/*
+	 * Patch up the instructions in hashtable.S:flush_hash_page
+	 */
+	flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
+	flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
+	flush_icache_range((unsigned long) &flush_hash_patch_A[0],
+			   (unsigned long) &flush_hash_patch_B[1]);
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
+}
diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c
new file mode 100644
index 00000000000..6c3dc3c44c8
--- /dev/null
+++ b/arch/powerpc/mm/tlb.c
@@ -0,0 +1,183 @@
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU uses a hash table to store virtual to
+ * physical translations, these routines flush entries from the
+ * hash table also.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+/*
+ * Called when unmapping pages to flush entries from the TLB/hash table.
+ */
+void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
+{
+	unsigned long ptephys;
+
+	if (Hash != 0) {
+		ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(mm->context, addr, ptephys, 1);
+	}
+}
+
+/*
+ * Called by ptep_set_access_flags, must flush on CPUs for which the
+ * DSI handler can't just "fixup" the TLB on a write fault
+ */
+void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (Hash != 0)
+		return;
+	_tlbie(addr);
+}
+
+/*
+ * Called at the end of a mmu_gather operation to make sure the
+ * TLB flush is completely done.
+ */
+void tlb_flush(struct mmu_gather *tlb)
+{
+	if (Hash == 0) {
+		/*
+		 * 603 needs to flush the whole TLB here since
+		 * it doesn't use a hash table.
+		 */
+		_tlbia();
+	}
+}
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * since the hardware hash table functions as an extension of the
+ * tlb as far as the linux tables are concerned, flush it too.
+ *    -- Cort
+ */
+
+/*
+ * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
+ * the cache operations on the bus.  Hence we need to use an IPI
+ * to get the other CPU(s) to invalidate their TLBs.
+ */
+#ifdef CONFIG_SMP_750
+#define FINISH_FLUSH	smp_send_tlb_invalidate(0)
+#else
+#define FINISH_FLUSH	do { } while (0)
+#endif
+
+static void flush_range(struct mm_struct *mm, unsigned long start,
+			unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long pmd_end;
+	int count;
+	unsigned int ctx = mm->context;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+	start &= PAGE_MASK;
+	if (start >= end)
+		return;
+	end = (end - 1) | ~PAGE_MASK;
+	pmd = pmd_offset(pgd_offset(mm, start), start);
+	for (;;) {
+		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
+		if (pmd_end > end)
+			pmd_end = end;
+		if (!pmd_none(*pmd)) {
+			count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
+			flush_hash_pages(ctx, start, pmd_val(*pmd), count);
+		}
+		if (pmd_end == end)
+			break;
+		start = pmd_end + 1;
+		++pmd;
+	}
+}
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_range(&init_mm, start, end);
+	FINISH_FLUSH;
+}
+
+/*
+ * Flush all the (user) entries for the address space described by mm.
+ */
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *mp;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+
+	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+		flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
+	FINISH_FLUSH;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	if (Hash == 0) {
+		_tlbie(vmaddr);
+		return;
+	}
+	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
+	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+	if (!pmd_none(*pmd))
+		flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
+	FINISH_FLUSH;
+}
+
+/*
+ * For each address in the range, find the pte for the address
+ * and check _PAGE_HASHPTE bit; if it is set, find and destroy
+ * the corresponding HPTE.
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	flush_range(vma->vm_mm, start, end);
+	FINISH_FLUSH;
+}
-- 
cgit v1.2.3-70-g09d2


From 20c8c2106305729e7d5e06f6c3d390e965a3dd34 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 28 Sep 2005 20:28:14 +1000
Subject: powerpc: Fixes to get the merged kernel to boot on powermac.

This merges ppc_ksyms.c, puts back the actual do_execve call in
sys_execve, makes init_MMU call find_end_of_memory rather than
ppc_md.find_end_of_memory (every platform has a device tree
with a /memory node now, right?) and fixes some problems with the
mpic initialization on newworld powermacs.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/Makefile               |   2 +-
 arch/powerpc/kernel/ppc_ksyms.c            | 338 +++++++++++++++++++++++++++++
 arch/powerpc/kernel/process.c              |   2 +
 arch/powerpc/mm/init.c                     |   2 +-
 arch/powerpc/platforms/powermac/pmac_pic.c |  17 ++
 arch/ppc/kernel/Makefile                   |   2 +-
 6 files changed, 360 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/kernel/ppc_ksyms.c

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 62c4a51a23d..58c130b10ec 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -14,5 +14,5 @@ extra-$(CONFIG_PPC_FPU)		+= fpu.o
 extra-y				+= vmlinux.lds
 
 obj-y				:= semaphore.o traps.o process.o
-
+obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
new file mode 100644
index 00000000000..7bfa0f0121f
--- /dev/null
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -0,0 +1,338 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/elfcore.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/nvram.h>
+#include <linux/console.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/ide.h>
+#include <linux/pm.h>
+#include <linux/bitops.h>
+
+#include <asm/page.h>
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/ide.h>
+#include <asm/atomic.h>
+#include <asm/checksum.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <asm/prom.h>
+#include <asm/system.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <asm/pmac_feature.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/hw_irq.h>
+#include <asm/nvram.h>
+#include <asm/mmu_context.h>
+#include <asm/backlight.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/btext.h>
+#include <asm/div64.h>
+#include <asm/xmon.h>
+
+#ifdef  CONFIG_8xx
+#include <asm/commproc.h>
+#endif
+
+extern void transfer_to_handler(void);
+extern void do_IRQ(struct pt_regs *regs);
+extern void MachineCheckException(struct pt_regs *regs);
+extern void AlignmentException(struct pt_regs *regs);
+extern void ProgramCheckException(struct pt_regs *regs);
+extern void SingleStepException(struct pt_regs *regs);
+extern int do_signal(sigset_t *, struct pt_regs *);
+extern int pmac_newworld;
+extern int sys_sigreturn(struct pt_regs *regs);
+
+long long __ashrdi3(long long, int);
+long long __ashldi3(long long, int);
+long long __lshrdi3(long long, int);
+
+extern unsigned long mm_ptov (unsigned long paddr);
+
+EXPORT_SYMBOL(clear_pages);
+EXPORT_SYMBOL(clear_user_page);
+EXPORT_SYMBOL(do_signal);
+EXPORT_SYMBOL(transfer_to_handler);
+EXPORT_SYMBOL(do_IRQ);
+EXPORT_SYMBOL(MachineCheckException);
+EXPORT_SYMBOL(AlignmentException);
+EXPORT_SYMBOL(ProgramCheckException);
+EXPORT_SYMBOL(SingleStepException);
+EXPORT_SYMBOL(sys_sigreturn);
+EXPORT_SYMBOL(ppc_n_lost_interrupts);
+EXPORT_SYMBOL(ppc_lost_interrupts);
+
+EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
+EXPORT_SYMBOL(DMA_MODE_READ);
+EXPORT_SYMBOL(DMA_MODE_WRITE);
+#if defined(CONFIG_PPC_PREP)
+EXPORT_SYMBOL(_prep_type);
+EXPORT_SYMBOL(ucSystemType);
+#endif
+
+#if !defined(__INLINE_BITOPS)
+EXPORT_SYMBOL(set_bit);
+EXPORT_SYMBOL(clear_bit);
+EXPORT_SYMBOL(change_bit);
+EXPORT_SYMBOL(test_and_set_bit);
+EXPORT_SYMBOL(test_and_clear_bit);
+EXPORT_SYMBOL(test_and_change_bit);
+#endif /* __INLINE_BITOPS */
+
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strcasecmp);
+EXPORT_SYMBOL(__div64_32);
+
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+EXPORT_SYMBOL(ip_fast_csum);
+EXPORT_SYMBOL(csum_tcpudp_magic);
+
+EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
+
+EXPORT_SYMBOL(_insb);
+EXPORT_SYMBOL(_outsb);
+EXPORT_SYMBOL(_insw);
+EXPORT_SYMBOL(_outsw);
+EXPORT_SYMBOL(_insl);
+EXPORT_SYMBOL(_outsl);
+EXPORT_SYMBOL(_insw_ns);
+EXPORT_SYMBOL(_outsw_ns);
+EXPORT_SYMBOL(_insl_ns);
+EXPORT_SYMBOL(_outsl_ns);
+EXPORT_SYMBOL(iopa);
+EXPORT_SYMBOL(mm_ptov);
+EXPORT_SYMBOL(ioremap);
+#ifdef CONFIG_44x
+EXPORT_SYMBOL(ioremap64);
+#endif
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+EXPORT_SYMBOL(ppc_ide_md);
+#endif
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(isa_io_base);
+EXPORT_SYMBOL(isa_mem_base);
+EXPORT_SYMBOL(pci_dram_offset);
+EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(pci_free_consistent);
+EXPORT_SYMBOL(pci_bus_io_base);
+EXPORT_SYMBOL(pci_bus_io_base_phys);
+EXPORT_SYMBOL(pci_bus_mem_base_phys);
+EXPORT_SYMBOL(pci_bus_to_hose);
+EXPORT_SYMBOL(pci_resource_to_bus);
+EXPORT_SYMBOL(pci_phys_to_bus);
+EXPORT_SYMBOL(pci_bus_to_phys);
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+EXPORT_SYMBOL(flush_dcache_all);
+#endif
+
+EXPORT_SYMBOL(start_thread);
+EXPORT_SYMBOL(kernel_thread);
+
+EXPORT_SYMBOL(flush_instruction_cache);
+EXPORT_SYMBOL(giveup_fpu);
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(__flush_icache_range);
+#else
+EXPORT_SYMBOL(flush_icache_range);
+#endif
+EXPORT_SYMBOL(flush_dcache_range);
+EXPORT_SYMBOL(flush_icache_user_range);
+EXPORT_SYMBOL(flush_dcache_page);
+EXPORT_SYMBOL(flush_tlb_kernel_range);
+EXPORT_SYMBOL(flush_tlb_page);
+EXPORT_SYMBOL(_tlbie);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL(last_task_used_altivec);
+EXPORT_SYMBOL(giveup_altivec);
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+EXPORT_SYMBOL(last_task_used_spe);
+EXPORT_SYMBOL(giveup_spe);
+#endif /* CONFIG_SPE */
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(smp_call_function);
+EXPORT_SYMBOL(smp_hw_index);
+#endif
+
+EXPORT_SYMBOL(ppc_md);
+
+#ifdef CONFIG_ADB
+EXPORT_SYMBOL(adb_request);
+EXPORT_SYMBOL(adb_register);
+EXPORT_SYMBOL(adb_unregister);
+EXPORT_SYMBOL(adb_poll);
+EXPORT_SYMBOL(adb_try_handler_change);
+#endif /* CONFIG_ADB */
+#ifdef CONFIG_ADB_CUDA
+EXPORT_SYMBOL(cuda_request);
+EXPORT_SYMBOL(cuda_poll);
+#endif /* CONFIG_ADB_CUDA */
+#ifdef CONFIG_PPC_MULTIPLATFORM
+EXPORT_SYMBOL(_machine);
+#endif
+#ifdef CONFIG_PPC_PMAC
+EXPORT_SYMBOL(sys_ctrler);
+EXPORT_SYMBOL(pmac_newworld);
+#endif
+#ifdef CONFIG_PPC_OF
+EXPORT_SYMBOL(find_devices);
+EXPORT_SYMBOL(find_type_devices);
+EXPORT_SYMBOL(find_compatible_devices);
+EXPORT_SYMBOL(find_path_device);
+EXPORT_SYMBOL(device_is_compatible);
+EXPORT_SYMBOL(machine_is_compatible);
+EXPORT_SYMBOL(find_all_nodes);
+EXPORT_SYMBOL(get_property);
+EXPORT_SYMBOL(request_OF_resource);
+EXPORT_SYMBOL(release_OF_resource);
+EXPORT_SYMBOL(pci_busdev_to_OF_node);
+EXPORT_SYMBOL(pci_device_to_OF_node);
+EXPORT_SYMBOL(pci_device_from_OF_node);
+EXPORT_SYMBOL(of_find_node_by_name);
+EXPORT_SYMBOL(of_find_node_by_type);
+EXPORT_SYMBOL(of_find_compatible_node);
+EXPORT_SYMBOL(of_find_node_by_path);
+EXPORT_SYMBOL(of_find_all_nodes);
+EXPORT_SYMBOL(of_get_parent);
+EXPORT_SYMBOL(of_get_next_child);
+EXPORT_SYMBOL(of_node_get);
+EXPORT_SYMBOL(of_node_put);
+#endif /* CONFIG_PPC_OF */
+#if defined(CONFIG_BOOTX_TEXT)
+EXPORT_SYMBOL(btext_update_display);
+#endif
+#if defined(CONFIG_SCSI) && defined(CONFIG_PPC_PMAC)
+EXPORT_SYMBOL(note_scsi_host);
+#endif
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(kd_mksound);
+#endif
+EXPORT_SYMBOL(to_tm);
+
+EXPORT_SYMBOL(pm_power_off);
+
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(cacheable_memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memchr);
+
+#if defined(CONFIG_FB_VGA16_MODULE)
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(timer_interrupt);
+EXPORT_SYMBOL(irq_desc);
+EXPORT_SYMBOL(tb_ticks_per_jiffy);
+EXPORT_SYMBOL(get_wchan);
+EXPORT_SYMBOL(console_drivers);
+
+#ifdef CONFIG_PPC_ISERIES
+EXPORT_SYMBOL(local_irq_disable);
+EXPORT_SYMBOL(local_irq_enable);
+EXPORT_SYMBOL(local_get_flags);
+#endif
+
+#ifdef CONFIG_XMON
+EXPORT_SYMBOL(xmon);
+EXPORT_SYMBOL(xmon_printf);
+#endif
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_interruptible);
+
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON)
+extern void (*debugger)(struct pt_regs *regs);
+extern int (*debugger_bpt)(struct pt_regs *regs);
+extern int (*debugger_sstep)(struct pt_regs *regs);
+extern int (*debugger_iabr_match)(struct pt_regs *regs);
+extern int (*debugger_dabr_match)(struct pt_regs *regs);
+extern void (*debugger_fault_handler)(struct pt_regs *regs);
+
+EXPORT_SYMBOL(debugger);
+EXPORT_SYMBOL(debugger_bpt);
+EXPORT_SYMBOL(debugger_sstep);
+EXPORT_SYMBOL(debugger_iabr_match);
+EXPORT_SYMBOL(debugger_dabr_match);
+EXPORT_SYMBOL(debugger_fault_handler);
+#endif
+
+#ifdef  CONFIG_8xx
+EXPORT_SYMBOL(cpm_install_handler);
+EXPORT_SYMBOL(cpm_free_handler);
+#endif /* CONFIG_8xx */
+#if defined(CONFIG_8xx) || defined(CONFIG_40x) || defined(CONFIG_85xx) ||\
+	defined(CONFIG_83xx)
+EXPORT_SYMBOL(__res);
+#endif
+
+EXPORT_SYMBOL(next_mmu_context);
+EXPORT_SYMBOL(set_context);
+EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */
+EXPORT_SYMBOL(disarm_decr);
+#ifdef CONFIG_PPC_STD_MMU
+extern long mol_trampoline;
+EXPORT_SYMBOL(mol_trampoline); /* For MOL */
+EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
+#ifdef CONFIG_SMP
+extern int mmu_hash_lock;
+EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
+#endif /* CONFIG_SMP */
+extern long *intercept_table;
+EXPORT_SYMBOL(intercept_table);
+#endif /* CONFIG_PPC_STD_MMU */
+EXPORT_SYMBOL(cur_cpu_spec);
+#ifdef CONFIG_PPC_PMAC
+extern unsigned long agp_special_page;
+EXPORT_SYMBOL(agp_special_page);
+#endif
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+EXPORT_SYMBOL(__mtdcr);
+EXPORT_SYMBOL(__mfdcr);
+#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f5a9d2a84fa..e3946769dd8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -601,6 +601,8 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
 	flush_fp_to_thread(current);
 	flush_altivec_to_thread(current);
 	flush_spe_to_thread(current);
+	error = do_execve(filename, (char __user * __user *) a1,
+			  (char __user * __user *) a2, regs);
 	if (error == 0) {
 		task_lock(current);
 		current->ptrace &= ~PT_DTRACE;
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
index f4d983a6e52..3a81ef15c67 100644
--- a/arch/powerpc/mm/init.c
+++ b/arch/powerpc/mm/init.c
@@ -239,7 +239,7 @@ void __init MMU_init(void)
 	if (boot_mem_size)
 		total_memory = boot_mem_size;
 	else
-		total_memory = ppc_md.find_end_of_memory();
+		total_memory = find_end_of_memory();
 
 	if (__max_memory && total_memory > __max_memory)
 		total_memory = __max_memory;
diff --git a/arch/powerpc/platforms/powermac/pmac_pic.c b/arch/powerpc/platforms/powermac/pmac_pic.c
index bf3e1899a4c..a6b1b577e19 100644
--- a/arch/powerpc/platforms/powermac/pmac_pic.c
+++ b/arch/powerpc/platforms/powermac/pmac_pic.c
@@ -424,6 +424,8 @@ void __init pmac_pic_init(void)
 
 		printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n",
 		       (unsigned int)irqctrler->addrs[0].address);
+		ppc_md.get_irq = mpic_get_irq;
+		pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler, 0, 0);
 
 		prom_get_irq_senses(senses, 0, 128);
 		mpic1 = mpic_alloc(irqctrler->addrs[0].address,
@@ -452,7 +454,22 @@ void __init pmac_pic_init(void)
 			mpic_setup_cascade(irqctrler2->intrs[0].line,
 					   pmac_u3_cascade, mpic2);
 		}
+#ifdef CONFIG_XMON
+		{
+			struct device_node* pswitch;
+			int nmi_irq;
+
+			pswitch = find_devices("programmer-switch");
+			if (pswitch && pswitch->n_intrs) {
+				nmi_irq = pswitch->intrs[0].line;
+				openpic_init_nmi_irq(nmi_irq);
+				setup_irq(nmi_irq, &xmon_action);
+			}
+		}
+#endif	/* CONFIG_XMON */
+		return;
 	}
+	irqctrler = NULL;
 
 	/* Get the level/edge settings, assume if it's not
 	 * a Grand Central nor an OHare, then it's an Heathrow
diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile
index 0649540bc7d..467f9648035 100644
--- a/arch/ppc/kernel/Makefile
+++ b/arch/ppc/kernel/Makefile
@@ -47,7 +47,7 @@ obj-y				:= entry.o irq.o idle.o time.o misc.o \
 obj-$(CONFIG_6xx)		+= l2cr.o cpu_setup_6xx.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o
 obj-$(CONFIG_POWER4)		+= cpu_setup_power4.o
-obj-$(CONFIG_MODULES)		+= module.o ppc_ksyms.o
+obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_NOT_COHERENT_CACHE)	+= dma-mapping.o
 obj-$(CONFIG_PCI)		+= pci.o
 obj-$(CONFIG_KGDB)		+= ppc-stub.o
-- 
cgit v1.2.3-70-g09d2


From 7c8c6b9776fb41134d87ef50706a777a45d61cd4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 6 Oct 2005 12:23:33 +1000
Subject: powerpc: Merge lmb.c and make MM initialization use it.

This also creates merged versions of do_init_bootmem, paging_init
and mem_init and moves them to arch/powerpc/mm/mem.c.  It gets rid
of the mem_pieces stuff.

I made memory_limit a parameter to lmb_enforce_memory_limit rather
than a global referenced by that function.  This will require some
small changes to ppc64 if we want to continue building ARCH=ppc64
using the merged lmb.c.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/Makefile     |   4 +-
 arch/powerpc/mm/init.c       | 365 +++----------------------------------------
 arch/powerpc/mm/init64.c     | 126 ---------------
 arch/powerpc/mm/lmb.c        | 296 +++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/mem.c        | 185 +++++++++++++++++++++-
 arch/powerpc/mm/mem_pieces.c | 163 -------------------
 arch/powerpc/mm/mem_pieces.h |  48 ------
 arch/powerpc/mm/mmu_decl.h   |   2 +
 arch/powerpc/mm/pgtable.c    |   3 +-
 arch/powerpc/mm/ppc_mmu.c    |  17 +-
 include/asm-ppc/page.h       |  18 ++-
 11 files changed, 520 insertions(+), 707 deletions(-)
 create mode 100644 arch/powerpc/mm/lmb.c
 delete mode 100644 arch/powerpc/mm/mem_pieces.c
 delete mode 100644 arch/powerpc/mm/mem_pieces.h

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 9f52c26acd8..afd3be112b7 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -2,9 +2,9 @@
 # Makefile for the linux ppc-specific parts of the memory manager.
 #
 
-obj-y				:= fault.o mem.o
+obj-y				:= fault.o mem.o lmb.o
 obj-$(CONFIG_PPC32)		+= init.o pgtable.o mmu_context.o \
-				   mem_pieces.o tlb.o
+				   tlb.o
 obj-$(CONFIG_PPC64)		+= init64.o pgtable64.o mmu_context64.o
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu.o hash_32.o
 obj-$(CONFIG_40x)		+= 4xx_mmu.o
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
index 3a81ef15c67..bf13c14e66b 100644
--- a/arch/powerpc/mm/init.c
+++ b/arch/powerpc/mm/init.c
@@ -45,8 +45,9 @@
 #include <asm/tlb.h>
 #include <asm/bootinfo.h>
 #include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/sections.h>
 
-#include "mem_pieces.h"
 #include "mmu_decl.h"
 
 #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
@@ -65,17 +66,11 @@ unsigned long total_lowmem;
 unsigned long ppc_memstart;
 unsigned long ppc_memoffset = PAGE_OFFSET;
 
-int mem_init_done;
-int init_bootmem_done;
 int boot_mapsize;
 #ifdef CONFIG_PPC_PMAC
 unsigned long agp_special_page;
 #endif
 
-extern char _end[];
-extern char etext[], _stext[];
-extern char __init_begin, __init_end;
-
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
@@ -85,15 +80,15 @@ EXPORT_SYMBOL(kmap_pte);
 #endif
 
 void MMU_init(void);
-void set_phys_avail(unsigned long total_ram);
 
 /* XXX should be in current.h  -- paulus */
 extern struct task_struct *current_set[NR_CPUS];
 
 char *klimit = _end;
-struct mem_pieces phys_avail;
 struct device_node *memory_node;
 
+extern int init_bootmem_done;
+
 /*
  * this tells the system to map all of ram with the segregs
  * (i.e. page tables) instead of the bats.
@@ -102,84 +97,14 @@ struct device_node *memory_node;
 int __map_without_bats;
 int __map_without_ltlbs;
 
-/* max amount of RAM to use */
-unsigned long __max_memory;
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
 /*
- * Read in a property describing some pieces of memory.
+ * limit of what is accessible with initial MMU setup -
+ * 256MB usually, but only 16MB on 601.
  */
-static int __init get_mem_prop(char *name, struct mem_pieces *mp)
-{
-	struct reg_property *rp;
-	int i, s;
-	unsigned int *ip;
-	int nac = prom_n_addr_cells(memory_node);
-	int nsc = prom_n_size_cells(memory_node);
-
-	ip = (unsigned int *) get_property(memory_node, name, &s);
-	if (ip == NULL) {
-		printk(KERN_ERR "error: couldn't get %s property on /memory\n",
-		       name);
-		return 0;
-	}
-	s /= (nsc + nac) * 4;
-	rp = mp->regions;
-	for (i = 0; i < s; ++i, ip += nac+nsc) {
-		if (nac >= 2 && ip[nac-2] != 0)
-			continue;
-		rp->address = ip[nac-1];
-		if (nsc >= 2 && ip[nac+nsc-2] != 0)
-			rp->size = ~0U;
-		else
-			rp->size = ip[nac+nsc-1];
-		++rp;
-	}
-	mp->n_regions = rp - mp->regions;
-
-	/* Make sure the pieces are sorted. */
-	mem_pieces_sort(mp);
-	mem_pieces_coalesce(mp);
-	return 1;
-}
-
-/*
- * Collect information about physical RAM and which pieces are
- * already in use from the device tree.
- */
-unsigned long __init find_end_of_memory(void)
-{
-	unsigned long a, total;
-	struct mem_pieces phys_mem;
-
-	/*
-	 * Find out where physical memory is, and check that it
-	 * starts at 0 and is contiguous.  It seems that RAM is
-	 * always physically contiguous on Power Macintoshes.
-	 *
-	 * Supporting discontiguous physical memory isn't hard,
-	 * it just makes the virtual <-> physical mapping functions
-	 * more complicated (or else you end up wasting space
-	 * in mem_map).
-	 */
-	memory_node = find_devices("memory");
-	if (memory_node == NULL || !get_mem_prop("reg", &phys_mem)
-	    || phys_mem.n_regions == 0)
-		panic("No RAM??");
-	a = phys_mem.regions[0].address;
-	if (a != 0)
-		panic("RAM doesn't start at physical address 0");
-	total = phys_mem.regions[0].size;
-
-	if (phys_mem.n_regions > 1) {
-		printk("RAM starting at 0x%x is not contiguous\n",
-		       phys_mem.regions[1].address);
-		printk("Using RAM from 0 to 0x%lx\n", total-1);
-	}
-
-	return total;
-}
+unsigned long __initial_memory_limit = 0x10000000;
 
 /*
  * Check for command-line options that affect what MMU_init will do.
@@ -194,27 +119,6 @@ void MMU_setup(void)
 	if (strstr(cmd_line, "noltlbs")) {
 		__map_without_ltlbs = 1;
 	}
-
-	/* Look for mem= option on command line */
-	if (strstr(cmd_line, "mem=")) {
-		char *p, *q;
-		unsigned long maxmem = 0;
-
-		for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
-			q = p + 4;
-			if (p > cmd_line && p[-1] != ' ')
-				continue;
-			maxmem = simple_strtoul(q, &q, 0);
-			if (*q == 'k' || *q == 'K') {
-				maxmem <<= 10;
-				++q;
-			} else if (*q == 'm' || *q == 'M') {
-				maxmem <<= 20;
-				++q;
-			}
-		}
-		__max_memory = maxmem;
-	}
 }
 
 /*
@@ -227,23 +131,22 @@ void __init MMU_init(void)
 	if (ppc_md.progress)
 		ppc_md.progress("MMU:enter", 0x111);
 
+	/* 601 can only access 16MB at the moment */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		__initial_memory_limit = 0x01000000;
+
 	/* parse args from command line */
 	MMU_setup();
 
-	/*
-	 * Figure out how much memory we have, how much
-	 * is lowmem, and how much is highmem.  If we were
-	 * passed the total memory size from the bootloader,
-	 * just use it.
-	 */
-	if (boot_mem_size)
-		total_memory = boot_mem_size;
-	else
-		total_memory = find_end_of_memory();
-
-	if (__max_memory && total_memory > __max_memory)
-		total_memory = __max_memory;
+	if (lmb.memory.cnt > 1) {
+		lmb.memory.cnt = 1;
+		lmb_analyze();
+		printk(KERN_WARNING "Only using first contiguous memory region");
+	}
+
+	total_memory = lmb_end_of_DRAM();
 	total_lowmem = total_memory;
+
 #ifdef CONFIG_FSL_BOOKE
 	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
 	 * entries, so we need to adjust lowmem to match the amount we can map
@@ -256,7 +159,6 @@ void __init MMU_init(void)
 		total_memory = total_lowmem;
 #endif /* CONFIG_HIGHMEM */
 	}
-	set_phys_avail(total_lowmem);
 
 	/* Initialize the MMU hardware */
 	if (ppc_md.progress)
@@ -303,7 +205,8 @@ void __init *early_get_page(void)
 	if (init_bootmem_done) {
 		p = alloc_bootmem_pages(PAGE_SIZE);
 	} else {
-		p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE);
+		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+					__initial_memory_limit));
 	}
 	return p;
 }
@@ -353,229 +256,3 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 	}
 }
 #endif
-
-/*
- * Initialize the bootmem system and give it all the memory we
- * have available.
- */
-void __init do_init_bootmem(void)
-{
-	unsigned long start, size;
-	int i;
-
-	/*
-	 * Find an area to use for the bootmem bitmap.
-	 * We look for the first area which is at least
-	 * 128kB in length (128kB is enough for a bitmap
-	 * for 4GB of memory, using 4kB pages), plus 1 page
-	 * (in case the address isn't page-aligned).
-	 */
-	start = 0;
-	size = 0;
-	for (i = 0; i < phys_avail.n_regions; ++i) {
-		unsigned long a = phys_avail.regions[i].address;
-		unsigned long s = phys_avail.regions[i].size;
-		if (s <= size)
-			continue;
-		start = a;
-		size = s;
-		if (s >= 33 * PAGE_SIZE)
-			break;
-	}
-	start = PAGE_ALIGN(start);
-
-	min_low_pfn = start >> PAGE_SHIFT;
-	max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT;
-	max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT;
-	boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn,
-					 PPC_MEMSTART >> PAGE_SHIFT,
-					 max_low_pfn);
-
-	/* remove the bootmem bitmap from the available memory */
-	mem_pieces_remove(&phys_avail, start, boot_mapsize, 1);
-
-	/* add everything in phys_avail into the bootmem map */
-	for (i = 0; i < phys_avail.n_regions; ++i)
-		free_bootmem(phys_avail.regions[i].address,
-			     phys_avail.regions[i].size);
-
-	init_bootmem_done = 1;
-}
-
-/*
- * paging_init() sets up the page tables - in fact we've already done this.
- */
-void __init paging_init(void)
-{
-	unsigned long zones_size[MAX_NR_ZONES], i;
-
-#ifdef CONFIG_HIGHMEM
-	map_page(PKMAP_BASE, 0, 0);	/* XXX gross */
-	pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
-			(PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
-	map_page(KMAP_FIX_BEGIN, 0, 0);	/* XXX gross */
-	kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
-			(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
-	kmap_prot = PAGE_KERNEL;
-#endif /* CONFIG_HIGHMEM */
-
-	/*
-	 * All pages are DMA-able so we put them all in the DMA zone.
-	 */
-	zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
-	for (i = 1; i < MAX_NR_ZONES; i++)
-		zones_size[i] = 0;
-
-#ifdef CONFIG_HIGHMEM
-	zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
-#endif /* CONFIG_HIGHMEM */
-
-	free_area_init(zones_size);
-}
-
-void __init mem_init(void)
-{
-	unsigned long addr;
-	int codepages = 0;
-	int datapages = 0;
-	int initpages = 0;
-#ifdef CONFIG_HIGHMEM
-	unsigned long highmem_mapnr;
-
-	highmem_mapnr = total_lowmem >> PAGE_SHIFT;
-#endif /* CONFIG_HIGHMEM */
-	max_mapnr = total_memory >> PAGE_SHIFT;
-
-	high_memory = (void *) __va(PPC_MEMSTART + total_lowmem);
-	num_physpages = max_mapnr;	/* RAM is assumed contiguous */
-
-	totalram_pages += free_all_bootmem();
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	/* if we are booted from BootX with an initial ramdisk,
-	   make sure the ramdisk pages aren't reserved. */
-	if (initrd_start) {
-		for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE)
-			ClearPageReserved(virt_to_page(addr));
-	}
-#endif /* CONFIG_BLK_DEV_INITRD */
-
-#ifdef CONFIG_PPC_OF
-	/* mark the RTAS pages as reserved */
-	if ( rtas_data )
-		for (addr = (ulong)__va(rtas_data);
-		     addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ;
-		     addr += PAGE_SIZE)
-			SetPageReserved(virt_to_page(addr));
-#endif
-#ifdef CONFIG_PPC_PMAC
-	if (agp_special_page)
-		SetPageReserved(virt_to_page(agp_special_page));
-#endif
-	for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory;
-	     addr += PAGE_SIZE) {
-		if (!PageReserved(virt_to_page(addr)))
-			continue;
-		if (addr < (ulong) etext)
-			codepages++;
-		else if (addr >= (unsigned long)&__init_begin
-			 && addr < (unsigned long)&__init_end)
-			initpages++;
-		else if (addr < (ulong) klimit)
-			datapages++;
-	}
-
-#ifdef CONFIG_HIGHMEM
-	{
-		unsigned long pfn;
-
-		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
-			struct page *page = mem_map + pfn;
-
-			ClearPageReserved(page);
-			set_page_count(page, 1);
-			__free_page(page);
-			totalhigh_pages++;
-		}
-		totalram_pages += totalhigh_pages;
-	}
-#endif /* CONFIG_HIGHMEM */
-
-        printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
-	       (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
-	       codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
-	       initpages<< (PAGE_SHIFT-10),
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
-
-#ifdef CONFIG_PPC_PMAC
-	if (agp_special_page)
-		printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
-#endif
-
-	mem_init_done = 1;
-}
-
-/*
- * Set phys_avail to the amount of physical memory,
- * less the kernel text/data/bss.
- */
-void __init
-set_phys_avail(unsigned long total_memory)
-{
-	unsigned long kstart, ksize;
-
-	/*
-	 * Initially, available physical memory is equivalent to all
-	 * physical memory.
-	 */
-
-	phys_avail.regions[0].address = PPC_MEMSTART;
-	phys_avail.regions[0].size = total_memory;
-	phys_avail.n_regions = 1;
-
-	/*
-	 * Map out the kernel text/data/bss from the available physical
-	 * memory.
-	 */
-
-	kstart = __pa(_stext);	/* should be 0 */
-	ksize = PAGE_ALIGN(klimit - _stext);
-
-	mem_pieces_remove(&phys_avail, kstart, ksize, 0);
-	mem_pieces_remove(&phys_avail, 0, 0x4000, 0);
-
-#if defined(CONFIG_BLK_DEV_INITRD)
-	/* Remove the init RAM disk from the available memory. */
-	if (initrd_start) {
-		mem_pieces_remove(&phys_avail, __pa(initrd_start),
-				  initrd_end - initrd_start, 1);
-	}
-#endif /* CONFIG_BLK_DEV_INITRD */
-#ifdef CONFIG_PPC_OF
-	/* remove the RTAS pages from the available memory */
-	if (rtas_data)
-		mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1);
-#endif
-#ifdef CONFIG_PPC_PMAC
-	/* Because of some uninorth weirdness, we need a page of
-	 * memory as high as possible (it must be outside of the
-	 * bus address seen as the AGP aperture). It will be used
-	 * by the r128 DRM driver
-	 *
-	 * FIXME: We need to make sure that page doesn't overlap any of the\
-	 * above. This could be done by improving mem_pieces_find to be able
-	 * to do a backward search from the end of the list.
-	 */
-	if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) {
-		agp_special_page = (total_memory - PAGE_SIZE);
-		mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0);
-		agp_special_page = (unsigned long)__va(agp_special_page);
-	}
-#endif /* CONFIG_PPC_PMAC */
-}
-
-/* Mark some memory as reserved by removing it from phys_avail. */
-void __init reserve_phys_mem(unsigned long start, unsigned long size)
-{
-	mem_pieces_remove(&phys_avail, start, size, 1);
-}
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c
index 81f6745b31e..c0ce6a7af3c 100644
--- a/arch/powerpc/mm/init64.c
+++ b/arch/powerpc/mm/init64.c
@@ -166,77 +166,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
-/*
- * Initialize the bootmem system and give it all the memory we
- * have available.
- */
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init do_init_bootmem(void)
-{
-	unsigned long i;
-	unsigned long start, bootmap_pages;
-	unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
-	int boot_mapsize;
-
-	/*
-	 * Find an area to use for the bootmem bitmap.  Calculate the size of
-	 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
-	 * Add 1 additional page in case the address isn't page-aligned.
-	 */
-	bootmap_pages = bootmem_bootmap_pages(total_pages);
-
-	start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
-	BUG_ON(!start);
-
-	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
-
-	max_pfn = max_low_pfn;
-
-	/* Add all physical memory to the bootmem map, mark each area
-	 * present.
-	 */
-	for (i=0; i < lmb.memory.cnt; i++)
-		free_bootmem(lmb.memory.region[i].base,
-			     lmb_size_bytes(&lmb.memory, i));
-
-	/* reserve the sections we're already using */
-	for (i=0; i < lmb.reserved.cnt; i++)
-		reserve_bootmem(lmb.reserved.region[i].base,
-				lmb_size_bytes(&lmb.reserved, i));
-
-	for (i=0; i < lmb.memory.cnt; i++)
-		memory_present(0, lmb_start_pfn(&lmb.memory, i),
-			       lmb_end_pfn(&lmb.memory, i));
-}
-
-/*
- * paging_init() sets up the page tables - in fact we've already done this.
- */
-void __init paging_init(void)
-{
-	unsigned long zones_size[MAX_NR_ZONES];
-	unsigned long zholes_size[MAX_NR_ZONES];
-	unsigned long total_ram = lmb_phys_mem_size();
-	unsigned long top_of_ram = lmb_end_of_DRAM();
-
-	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
-	       top_of_ram, total_ram);
-	printk(KERN_INFO "Memory hole size: %ldMB\n",
-	       (top_of_ram - total_ram) >> 20);
-	/*
-	 * All pages are DMA-able so we put them all in the DMA zone.
-	 */
-	memset(zones_size, 0, sizeof(zones_size));
-	memset(zholes_size, 0, sizeof(zholes_size));
-
-	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
-	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
-
-	free_area_init_node(0, NODE_DATA(0), zones_size,
-			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
-}
-#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
-
 static struct kcore_list kcore_vmem;
 
 static int __init setup_kcore(void)
@@ -264,61 +193,6 @@ static int __init setup_kcore(void)
 }
 module_init(setup_kcore);
 
-void __init mem_init(void)
-{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-	int nid;
-#endif
-	pg_data_t *pgdat;
-	unsigned long i;
-	struct page *page;
-	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
-
-	num_physpages = max_low_pfn;	/* RAM is assumed contiguous */
-	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-        for_each_online_node(nid) {
-		if (NODE_DATA(nid)->node_spanned_pages != 0) {
-			printk("freeing bootmem node %x\n", nid);
-			totalram_pages +=
-				free_all_bootmem_node(NODE_DATA(nid));
-		}
-	}
-#else
-	max_mapnr = num_physpages;
-	totalram_pages += free_all_bootmem();
-#endif
-
-	for_each_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			page = pgdat_page_nr(pgdat, i);
-			if (PageReserved(page))
-				reservedpages++;
-		}
-	}
-
-	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
-	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
-	datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
-	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
-	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
-		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		bsssize >> 10,
-		initsize >> 10);
-
-	mem_init_done = 1;
-
-	/* Initialize the vDSO */
-	vdso_init();
-}
-
 void __iomem * reserve_phb_iospace(unsigned long size)
 {
 	void __iomem *virt_addr;
diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c
new file mode 100644
index 00000000000..9b5aa6808eb
--- /dev/null
+++ b/arch/powerpc/mm/lmb.c
@@ -0,0 +1,296 @@
+/*
+ * Procedures for maintaining information about logical memory blocks.
+ *
+ * Peter Bergner, IBM Corp.	June 2001.
+ * Copyright (C) 2001 Peter Bergner.
+ * 
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#ifdef CONFIG_PPC32
+#include "mmu_decl.h"		/* for __max_low_memory */
+#endif
+
+struct lmb lmb;
+
+#undef DEBUG
+
+void lmb_dump_all(void)
+{
+#ifdef DEBUG
+	unsigned long i;
+
+	udbg_printf("lmb_dump_all:\n");
+	udbg_printf("    memory.cnt		  = 0x%lx\n",
+		    lmb.memory.cnt);
+	udbg_printf("    memory.size		  = 0x%lx\n",
+		    lmb.memory.size);
+	for (i=0; i < lmb.memory.cnt ;i++) {
+		udbg_printf("    memory.region[0x%x].base       = 0x%lx\n",
+			    i, lmb.memory.region[i].base);
+		udbg_printf("		      .size     = 0x%lx\n",
+			    lmb.memory.region[i].size);
+	}
+
+	udbg_printf("\n    reserved.cnt	  = 0x%lx\n",
+		    lmb.reserved.cnt);
+	udbg_printf("    reserved.size	  = 0x%lx\n",
+		    lmb.reserved.size);
+	for (i=0; i < lmb.reserved.cnt ;i++) {
+		udbg_printf("    reserved.region[0x%x].base       = 0x%lx\n",
+			    i, lmb.reserved.region[i].base);
+		udbg_printf("		      .size     = 0x%lx\n",
+			    lmb.reserved.region[i].size);
+	}
+#endif /* DEBUG */
+}
+
+static unsigned long __init lmb_addrs_overlap(unsigned long base1,
+		unsigned long size1, unsigned long base2, unsigned long size2)
+{
+	return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
+}
+
+static long __init lmb_addrs_adjacent(unsigned long base1, unsigned long size1,
+		unsigned long base2, unsigned long size2)
+{
+	if (base2 == base1 + size1)
+		return 1;
+	else if (base1 == base2 + size2)
+		return -1;
+
+	return 0;
+}
+
+static long __init lmb_regions_adjacent(struct lmb_region *rgn,
+		unsigned long r1, unsigned long r2)
+{
+	unsigned long base1 = rgn->region[r1].base;
+	unsigned long size1 = rgn->region[r1].size;
+	unsigned long base2 = rgn->region[r2].base;
+	unsigned long size2 = rgn->region[r2].size;
+
+	return lmb_addrs_adjacent(base1, size1, base2, size2);
+}
+
+/* Assumption: base addr of region 1 < base addr of region 2 */
+static void __init lmb_coalesce_regions(struct lmb_region *rgn,
+		unsigned long r1, unsigned long r2)
+{
+	unsigned long i;
+
+	rgn->region[r1].size += rgn->region[r2].size;
+	for (i=r2; i < rgn->cnt-1; i++) {
+		rgn->region[i].base = rgn->region[i+1].base;
+		rgn->region[i].size = rgn->region[i+1].size;
+	}
+	rgn->cnt--;
+}
+
+/* This routine called with relocation disabled. */
+void __init lmb_init(void)
+{
+	/* Create a dummy zero size LMB which will get coalesced away later.
+	 * This simplifies the lmb_add() code below...
+	 */
+	lmb.memory.region[0].base = 0;
+	lmb.memory.region[0].size = 0;
+	lmb.memory.cnt = 1;
+
+	/* Ditto. */
+	lmb.reserved.region[0].base = 0;
+	lmb.reserved.region[0].size = 0;
+	lmb.reserved.cnt = 1;
+}
+
+/* This routine may be called with relocation disabled. */
+void __init lmb_analyze(void)
+{
+	int i;
+
+	lmb.memory.size = 0;
+
+	for (i = 0; i < lmb.memory.cnt; i++)
+		lmb.memory.size += lmb.memory.region[i].size;
+}
+
+/* This routine called with relocation disabled. */
+static long __init lmb_add_region(struct lmb_region *rgn, unsigned long base,
+				  unsigned long size)
+{
+	unsigned long i, coalesced = 0;
+	long adjacent;
+
+	/* First try and coalesce this LMB with another. */
+	for (i=0; i < rgn->cnt; i++) {
+		unsigned long rgnbase = rgn->region[i].base;
+		unsigned long rgnsize = rgn->region[i].size;
+
+		adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
+		if ( adjacent > 0 ) {
+			rgn->region[i].base -= size;
+			rgn->region[i].size += size;
+			coalesced++;
+			break;
+		}
+		else if ( adjacent < 0 ) {
+			rgn->region[i].size += size;
+			coalesced++;
+			break;
+		}
+	}
+
+	if ((i < rgn->cnt-1) && lmb_regions_adjacent(rgn, i, i+1) ) {
+		lmb_coalesce_regions(rgn, i, i+1);
+		coalesced++;
+	}
+
+	if (coalesced)
+		return coalesced;
+	if (rgn->cnt >= MAX_LMB_REGIONS)
+		return -1;
+
+	/* Couldn't coalesce the LMB, so add it to the sorted table. */
+	for (i = rgn->cnt-1; i >= 0; i--) {
+		if (base < rgn->region[i].base) {
+			rgn->region[i+1].base = rgn->region[i].base;
+			rgn->region[i+1].size = rgn->region[i].size;
+		} else {
+			rgn->region[i+1].base = base;
+			rgn->region[i+1].size = size;
+			break;
+		}
+	}
+	rgn->cnt++;
+
+	return 0;
+}
+
+/* This routine may be called with relocation disabled. */
+long __init lmb_add(unsigned long base, unsigned long size)
+{
+	struct lmb_region *_rgn = &(lmb.memory);
+
+	/* On pSeries LPAR systems, the first LMB is our RMO region. */
+	if (base == 0)
+		lmb.rmo_size = size;
+
+	return lmb_add_region(_rgn, base, size);
+
+}
+
+long __init lmb_reserve(unsigned long base, unsigned long size)
+{
+	struct lmb_region *_rgn = &(lmb.reserved);
+
+	return lmb_add_region(_rgn, base, size);
+}
+
+long __init lmb_overlaps_region(struct lmb_region *rgn, unsigned long base,
+				unsigned long size)
+{
+	unsigned long i;
+
+	for (i=0; i < rgn->cnt; i++) {
+		unsigned long rgnbase = rgn->region[i].base;
+		unsigned long rgnsize = rgn->region[i].size;
+		if ( lmb_addrs_overlap(base,size,rgnbase,rgnsize) ) {
+			break;
+		}
+	}
+
+	return (i < rgn->cnt) ? i : -1;
+}
+
+unsigned long __init lmb_alloc(unsigned long size, unsigned long align)
+{
+	return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
+}
+
+unsigned long __init lmb_alloc_base(unsigned long size, unsigned long align,
+				    unsigned long max_addr)
+{
+	long i, j;
+	unsigned long base = 0;
+
+#ifdef CONFIG_PPC32
+	/* On 32-bit, make sure we allocate lowmem */
+	if (max_addr == LMB_ALLOC_ANYWHERE)
+		max_addr = __max_low_memory;
+#endif
+	for (i = lmb.memory.cnt-1; i >= 0; i--) {
+		unsigned long lmbbase = lmb.memory.region[i].base;
+		unsigned long lmbsize = lmb.memory.region[i].size;
+
+		if (max_addr == LMB_ALLOC_ANYWHERE)
+			base = _ALIGN_DOWN(lmbbase + lmbsize - size, align);
+		else if (lmbbase < max_addr) {
+			base = min(lmbbase + lmbsize, max_addr);
+			base = _ALIGN_DOWN(base - size, align);
+		} else
+			continue;
+
+		while ((lmbbase <= base) &&
+		       ((j = lmb_overlaps_region(&lmb.reserved, base, size)) >= 0) )
+			base = _ALIGN_DOWN(lmb.reserved.region[j].base - size,
+					   align);
+
+		if ((base != 0) && (lmbbase <= base))
+			break;
+	}
+
+	if (i < 0)
+		return 0;
+
+	lmb_add_region(&lmb.reserved, base, size);
+
+	return base;
+}
+
+/* You must call lmb_analyze() before this. */
+unsigned long __init lmb_phys_mem_size(void)
+{
+	return lmb.memory.size;
+}
+
+unsigned long __init lmb_end_of_DRAM(void)
+{
+	int idx = lmb.memory.cnt - 1;
+
+	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
+}
+
+/*
+ * Truncate the lmb list to memory_limit if it's set
+ * You must call lmb_analyze() after this.
+ */
+void __init lmb_enforce_memory_limit(unsigned long memory_limit)
+{
+	unsigned long i, limit;
+
+	if (! memory_limit)
+		return;
+
+	limit = memory_limit;
+	for (i = 0; i < lmb.memory.cnt; i++) {
+		if (limit > lmb.memory.region[i].size) {
+			limit -= lmb.memory.region[i].size;
+			continue;
+		}
+
+		lmb.memory.region[i].size = limit;
+		lmb.memory.cnt = i + 1;
+		break;
+	}
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 345db08e5d2..0650de74d0b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -45,8 +45,9 @@
 #include <asm/tlb.h>
 #include <asm/bootinfo.h>
 #include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/sections.h>
 
-#include "mem_pieces.h"
 #include "mmu_decl.h"
 
 #ifndef CPU_FTR_COHERENT_ICACHE
@@ -54,6 +55,9 @@
 #define CPU_FTR_NOEXECUTE	0
 #endif
 
+int init_bootmem_done;
+int mem_init_done;
+
 /*
  * This is called by /dev/mem to know if a given address has to
  * be mapped non-cacheable or not
@@ -130,6 +134,185 @@ void show_mem(void)
 	printk("%ld pages swap cached\n", cached);
 }
 
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.  If we are using highmem, we only put the
+ * lowmem into the bootmem system.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+void __init do_init_bootmem(void)
+{
+	unsigned long i;
+	unsigned long start, bootmap_pages;
+	unsigned long total_pages;
+	int boot_mapsize;
+
+	max_pfn = total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
+#ifdef CONFIG_HIGHMEM
+	total_pages = total_lowmem >> PAGE_SHIFT;
+#endif
+
+	/*
+	 * Find an area to use for the bootmem bitmap.  Calculate the size of
+	 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
+	 * Add 1 additional page in case the address isn't page-aligned.
+	 */
+	bootmap_pages = bootmem_bootmap_pages(total_pages);
+
+	start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
+	BUG_ON(!start);
+
+	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+
+	/* Add all physical memory to the bootmem map, mark each area
+	 * present.
+	 */
+	for (i = 0; i < lmb.memory.cnt; i++) {
+		unsigned long base = lmb.memory.region[i].base;
+		unsigned long size = lmb_size_bytes(&lmb.memory, i);
+#ifdef CONFIG_HIGHMEM
+		if (base >= total_lowmem)
+			continue;
+		if (base + size > total_lowmem)
+			size = total_lowmem - base;
+#endif
+		free_bootmem(base, size);
+	}
+
+	/* reserve the sections we're already using */
+	for (i = 0; i < lmb.reserved.cnt; i++)
+		reserve_bootmem(lmb.reserved.region[i].base,
+				lmb_size_bytes(&lmb.reserved, i));
+
+	/* XXX need to clip this if using highmem? */
+	for (i = 0; i < lmb.memory.cnt; i++)
+		memory_present(0, lmb_start_pfn(&lmb.memory, i),
+			       lmb_end_pfn(&lmb.memory, i));
+	init_bootmem_done = 1;
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES];
+	unsigned long zholes_size[MAX_NR_ZONES];
+	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long top_of_ram = lmb_end_of_DRAM();
+
+#ifdef CONFIG_HIGHMEM
+	map_page(PKMAP_BASE, 0, 0);	/* XXX gross */
+	pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
+	map_page(KMAP_FIX_BEGIN, 0, 0);	/* XXX gross */
+	kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
+	kmap_prot = PAGE_KERNEL;
+#endif /* CONFIG_HIGHMEM */
+
+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	       top_of_ram, total_ram);
+	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	       (top_of_ram - total_ram) >> 20);
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	memset(zones_size, 0, sizeof(zones_size));
+	memset(zholes_size, 0, sizeof(zholes_size));
+
+	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+
+#ifdef CONFIG_HIGHMEM
+	zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
+	zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
+	zholes_size[ZONE_HIGHMEM] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+#else
+	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+
+	free_area_init_node(0, NODE_DATA(0), zones_size,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
+}
+#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
+
+void __init mem_init(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int nid;
+#endif
+	pg_data_t *pgdat;
+	unsigned long i;
+	struct page *page;
+	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
+
+	num_physpages = max_pfn;	/* RAM is assumed contiguous */
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+        for_each_online_node(nid) {
+		if (NODE_DATA(nid)->node_spanned_pages != 0) {
+			printk("freeing bootmem node %x\n", nid);
+			totalram_pages +=
+				free_all_bootmem_node(NODE_DATA(nid));
+		}
+	}
+#else
+	max_mapnr = num_physpages;
+	totalram_pages += free_all_bootmem();
+#endif
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			if (PageReserved(page))
+				reservedpages++;
+		}
+	}
+
+	codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
+	datasize = (unsigned long)&__init_begin - (unsigned long)&_sdata;
+	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
+	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
+
+#ifdef CONFIG_HIGHMEM
+	{
+		unsigned long pfn, highmem_mapnr;
+
+		highmem_mapnr = total_lowmem >> PAGE_SHIFT;
+		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
+			struct page *page = pfn_to_page(pfn);
+
+			ClearPageReserved(page);
+			set_page_count(page, 1);
+			__free_page(page);
+			totalhigh_pages++;
+		}
+		totalram_pages += totalhigh_pages;
+		printk(KERN_INFO "High memory: %luk\n",
+		       totalhigh_pages << (PAGE_SHIFT-10));
+	}
+#endif /* CONFIG_HIGHMEM */
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
+	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		bsssize >> 10,
+		initsize >> 10);
+
+	mem_init_done = 1;
+
+#ifdef CONFIG_PPC64
+	/* Initialize the vDSO */
+	vdso_init();
+#endif
+}
+
 /*
  * This is called when a page has been modified by the kernel.
  * It just marks the page as not i-cache clean.  We do the i-cache
diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c
deleted file mode 100644
index 3d639052017..00000000000
--- a/arch/powerpc/mm/mem_pieces.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *      Changes to accommodate Power Macintoshes.
- *    Cort Dougan <cort@cs.nmt.edu>
- *      Rewrites.
- *    Grant Erickson <grant@lcse.umn.edu>
- *      General rework and split from mm/init.c.
- *
- *    Module name: mem_pieces.c
- *
- *    Description:
- *      Routines and data structures for manipulating and representing
- *      phyiscal memory extents (i.e. address/length pairs).
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
-#include <asm/page.h>
-
-#include "mem_pieces.h"
-
-extern struct mem_pieces phys_avail;
-
-static void mem_pieces_print(struct mem_pieces *);
-
-/*
- * Scan a region for a piece of a given size with the required alignment.
- */
-void __init *
-mem_pieces_find(unsigned int size, unsigned int align)
-{
-	int i;
-	unsigned a, e;
-	struct mem_pieces *mp = &phys_avail;
-
-	for (i = 0; i < mp->n_regions; ++i) {
-		a = mp->regions[i].address;
-		e = a + mp->regions[i].size;
-		a = (a + align - 1) & -align;
-		if (a + size <= e) {
-			mem_pieces_remove(mp, a, size, 1);
-			return (void *) __va(a);
-		}
-	}
-	panic("Couldn't find %u bytes at %u alignment\n", size, align);
-
-	return NULL;
-}
-
-/*
- * Remove some memory from an array of pieces
- */
-void __init
-mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size,
-		  int must_exist)
-{
-	int i, j;
-	unsigned int end, rs, re;
-	struct reg_property *rp;
-
-	end = start + size;
-	for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) {
-		if (end > rp->address && start < rp->address + rp->size)
-			break;
-	}
-	if (i >= mp->n_regions) {
-		if (must_exist)
-			printk("mem_pieces_remove: [%x,%x) not in any region\n",
-			       start, end);
-		return;
-	}
-	for (; i < mp->n_regions && end > rp->address; ++i, ++rp) {
-		rs = rp->address;
-		re = rs + rp->size;
-		if (must_exist && (start < rs || end > re)) {
-			printk("mem_pieces_remove: bad overlap [%x,%x) with",
-			       start, end);
-			mem_pieces_print(mp);
-			must_exist = 0;
-		}
-		if (start > rs) {
-			rp->size = start - rs;
-			if (end < re) {
-				/* need to split this entry */
-				if (mp->n_regions >= MEM_PIECES_MAX)
-					panic("eek... mem_pieces overflow");
-				for (j = mp->n_regions; j > i + 1; --j)
-					mp->regions[j] = mp->regions[j-1];
-				++mp->n_regions;
-				rp[1].address = end;
-				rp[1].size = re - end;
-			}
-		} else {
-			if (end < re) {
-				rp->address = end;
-				rp->size = re - end;
-			} else {
-				/* need to delete this entry */
-				for (j = i; j < mp->n_regions - 1; ++j)
-					mp->regions[j] = mp->regions[j+1];
-				--mp->n_regions;
-				--i;
-				--rp;
-			}
-		}
-	}
-}
-
-static void __init
-mem_pieces_print(struct mem_pieces *mp)
-{
-	int i;
-
-	for (i = 0; i < mp->n_regions; ++i)
-		printk(" [%x, %x)", mp->regions[i].address,
-		       mp->regions[i].address + mp->regions[i].size);
-	printk("\n");
-}
-
-void __init
-mem_pieces_sort(struct mem_pieces *mp)
-{
-	unsigned long a, s;
-	int i, j;
-
-	for (i = 1; i < mp->n_regions; ++i) {
-		a = mp->regions[i].address;
-		s = mp->regions[i].size;
-		for (j = i - 1; j >= 0; --j) {
-			if (a >= mp->regions[j].address)
-				break;
-			mp->regions[j+1] = mp->regions[j];
-		}
-		mp->regions[j+1].address = a;
-		mp->regions[j+1].size = s;
-	}
-}
-
-void __init
-mem_pieces_coalesce(struct mem_pieces *mp)
-{
-	unsigned long a, s, ns;
-	int i, j, d;
-
-	d = 0;
-	for (i = 0; i < mp->n_regions; i = j) {
-		a = mp->regions[i].address;
-		s = mp->regions[i].size;
-		for (j = i + 1; j < mp->n_regions
-			     && mp->regions[j].address - a <= s; ++j) {
-			ns = mp->regions[j].address + mp->regions[j].size - a;
-			if (ns > s)
-				s = ns;
-		}
-		mp->regions[d].address = a;
-		mp->regions[d].size = s;
-		++d;
-	}
-	mp->n_regions = d;
-}
diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h
deleted file mode 100644
index e2b700dc7f1..00000000000
--- a/arch/powerpc/mm/mem_pieces.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *      Changes to accommodate Power Macintoshes.
- *    Cort Dougan <cort@cs.nmt.edu>
- *      Rewrites.
- *    Grant Erickson <grant@lcse.umn.edu>
- *      General rework and split from mm/init.c.
- *
- *    Module name: mem_pieces.h
- *
- *    Description:
- *      Routines and data structures for manipulating and representing
- *      phyiscal memory extents (i.e. address/length pairs).
- *
- */
-
-#ifndef __MEM_PIECES_H__
-#define	__MEM_PIECES_H__
-
-#include <asm/prom.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* Type Definitions */
-
-#define	MEM_PIECES_MAX	32
-
-struct mem_pieces {
-    int n_regions;
-    struct reg_property regions[MEM_PIECES_MAX];
-};
-
-/* Function Prototypes */
-
-extern void	*mem_pieces_find(unsigned int size, unsigned int align);
-extern void	 mem_pieces_remove(struct mem_pieces *mp, unsigned int start,
-				   unsigned int size, int must_exist);
-extern void	 mem_pieces_coalesce(struct mem_pieces *mp);
-extern void	 mem_pieces_sort(struct mem_pieces *mp);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* __MEM_PIECES_H__ */
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 540f3292b22..06fe8af3af5 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -36,6 +36,8 @@ extern unsigned long ioremap_base;
 extern unsigned long ioremap_bot;
 extern unsigned int rtas_data, rtas_size;
 
+extern unsigned long __max_low_memory;
+extern unsigned long __initial_memory_limit;
 extern unsigned long total_memory;
 extern unsigned long total_lowmem;
 extern int mem_init_done;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 81a3d7446d3..5792e533916 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -190,8 +190,7 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
 	 * Don't allow anybody to remap normal RAM that we're using.
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
-	if ( mem_init_done && (p < virt_to_phys(high_memory)) )
-	{
+	if (mem_init_done && (p < virt_to_phys(high_memory))) {
 		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
 		       __builtin_return_address(0));
 		return NULL;
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c
index 9a381ed5eb2..cef9e83cc7e 100644
--- a/arch/powerpc/mm/ppc_mmu.c
+++ b/arch/powerpc/mm/ppc_mmu.c
@@ -32,9 +32,9 @@
 #include <asm/prom.h>
 #include <asm/mmu.h>
 #include <asm/machdep.h>
+#include <asm/lmb.h>
 
 #include "mmu_decl.h"
-#include "mem_pieces.h"
 
 PTE *Hash, *Hash_end;
 unsigned long Hash_size, Hash_mask;
@@ -215,17 +215,6 @@ void __init MMU_init_hw(void)
 #define MIN_N_HPTEG	1024		/* min 64kB hash table */
 #endif
 
-#ifdef CONFIG_POWER4
-	/* The hash table has already been allocated and initialized
-	   in prom.c */
-	n_hpteg = Hash_size >> LG_HPTEG_SIZE;
-	lg_n_hpteg = __ilog2(n_hpteg);
-
-	/* Remove the hash table from the available memory */
-	if (Hash)
-		reserve_phys_mem(__pa(Hash), Hash_size);
-
-#else /* CONFIG_POWER4 */
 	/*
 	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
 	 * This is less than the recommended amount, but then
@@ -245,10 +234,10 @@ void __init MMU_init_hw(void)
 	 * Find some memory for the hash table.
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = mem_pieces_find(Hash_size, Hash_size);
+	Hash = __va(lmb_alloc_base(Hash_size, Hash_size,
+				   __initial_memory_limit));
 	cacheable_memzero(Hash, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
-#endif /* CONFIG_POWER4 */
 
 	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
 
diff --git a/include/asm-ppc/page.h b/include/asm-ppc/page.h
index 4789dc02424..fc44f7ca62d 100644
--- a/include/asm-ppc/page.h
+++ b/include/asm-ppc/page.h
@@ -34,6 +34,17 @@ typedef unsigned long pte_basic_t;
 #define PTE_FMT		"%.8lx"
 #endif
 
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
+#define _ALIGN_DOWN(addr,size)	((addr)&(~((size)-1)))
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
+
+
 #undef STRICT_MM_TYPECHECKS
 
 #ifdef STRICT_MM_TYPECHECKS
@@ -76,13 +87,6 @@ typedef unsigned long pgprot_t;
 
 #endif
 
-
-/* align addr on a size boundary - adjust address up if needed -- Cort */
-#define _ALIGN(addr,size)	(((addr)+(size)-1)&(~((size)-1)))
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
-
 struct page;
 extern void clear_pages(void *page, int order);
 static inline void clear_page(void *page) { clear_pages(page, 0); }
-- 
cgit v1.2.3-70-g09d2


From 70d64ceaa1a84d2502405422a4dfd3f87786a347 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 10 Oct 2005 21:52:43 +1000
Subject: powerpc: Rename files to have consistent _32/_64 suffixes

This doesn't change any code, just renames things so we consistently
have foo_32.c and foo_64.c where we have separate 32- and 64-bit
versions.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Makefile            |    2 +-
 arch/powerpc/kernel/Makefile     |    4 +-
 arch/powerpc/kernel/head.S       | 1399 --------------------------------------
 arch/powerpc/kernel/head_32.S    | 1399 ++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/setup.c      |  678 ------------------
 arch/powerpc/kernel/setup_32.c   |  678 ++++++++++++++++++
 arch/powerpc/lib/Makefile        |    6 +-
 arch/powerpc/lib/checksum.S      |  225 ------
 arch/powerpc/lib/checksum64.S    |  229 -------
 arch/powerpc/lib/checksum_32.S   |  225 ++++++
 arch/powerpc/lib/checksum_64.S   |  229 +++++++
 arch/powerpc/lib/copy32.S        |  543 ---------------
 arch/powerpc/lib/copy_32.S       |  543 +++++++++++++++
 arch/powerpc/lib/copypage.S      |  121 ----
 arch/powerpc/lib/copypage_64.S   |  121 ++++
 arch/powerpc/lib/copyuser.S      |  576 ----------------
 arch/powerpc/lib/copyuser_64.S   |  576 ++++++++++++++++
 arch/powerpc/lib/mem_64.S        |  119 ++++
 arch/powerpc/lib/memcpy.S        |  172 -----
 arch/powerpc/lib/memcpy_64.S     |  172 +++++
 arch/powerpc/lib/usercopy.c      |   41 --
 arch/powerpc/lib/usercopy_64.c   |   41 ++
 arch/powerpc/mm/Makefile         |    8 +-
 arch/powerpc/mm/hash_32.S        |  618 -----------------
 arch/powerpc/mm/hash_low_32.S    |  618 +++++++++++++++++
 arch/powerpc/mm/init.c           |  258 -------
 arch/powerpc/mm/init64.c         |  259 -------
 arch/powerpc/mm/init_32.c        |  258 +++++++
 arch/powerpc/mm/init_64.c        |  259 +++++++
 arch/powerpc/mm/mem64.c          |  259 -------
 arch/powerpc/mm/mmu_context.c    |   86 ---
 arch/powerpc/mm/mmu_context64.c  |   63 --
 arch/powerpc/mm/mmu_context_32.c |   86 +++
 arch/powerpc/mm/mmu_context_64.c |   63 ++
 arch/powerpc/mm/pgtable.c        |  469 -------------
 arch/powerpc/mm/pgtable64.c      |  357 ----------
 arch/powerpc/mm/pgtable_32.c     |  469 +++++++++++++
 arch/powerpc/mm/pgtable_64.c     |  357 ++++++++++
 arch/powerpc/mm/ppc_mmu.c        |  285 --------
 arch/powerpc/mm/ppc_mmu_32.c     |  285 ++++++++
 arch/powerpc/mm/tlb.c            |  183 -----
 arch/powerpc/mm/tlb_32.c         |  183 +++++
 42 files changed, 6691 insertions(+), 6831 deletions(-)
 delete mode 100644 arch/powerpc/kernel/head.S
 create mode 100644 arch/powerpc/kernel/head_32.S
 delete mode 100644 arch/powerpc/kernel/setup.c
 create mode 100644 arch/powerpc/kernel/setup_32.c
 delete mode 100644 arch/powerpc/lib/checksum.S
 delete mode 100644 arch/powerpc/lib/checksum64.S
 create mode 100644 arch/powerpc/lib/checksum_32.S
 create mode 100644 arch/powerpc/lib/checksum_64.S
 delete mode 100644 arch/powerpc/lib/copy32.S
 create mode 100644 arch/powerpc/lib/copy_32.S
 delete mode 100644 arch/powerpc/lib/copypage.S
 create mode 100644 arch/powerpc/lib/copypage_64.S
 delete mode 100644 arch/powerpc/lib/copyuser.S
 create mode 100644 arch/powerpc/lib/copyuser_64.S
 create mode 100644 arch/powerpc/lib/mem_64.S
 delete mode 100644 arch/powerpc/lib/memcpy.S
 create mode 100644 arch/powerpc/lib/memcpy_64.S
 delete mode 100644 arch/powerpc/lib/usercopy.c
 create mode 100644 arch/powerpc/lib/usercopy_64.c
 delete mode 100644 arch/powerpc/mm/hash_32.S
 create mode 100644 arch/powerpc/mm/hash_low_32.S
 delete mode 100644 arch/powerpc/mm/init.c
 delete mode 100644 arch/powerpc/mm/init64.c
 create mode 100644 arch/powerpc/mm/init_32.c
 create mode 100644 arch/powerpc/mm/init_64.c
 delete mode 100644 arch/powerpc/mm/mem64.c
 delete mode 100644 arch/powerpc/mm/mmu_context.c
 delete mode 100644 arch/powerpc/mm/mmu_context64.c
 create mode 100644 arch/powerpc/mm/mmu_context_32.c
 create mode 100644 arch/powerpc/mm/mmu_context_64.c
 delete mode 100644 arch/powerpc/mm/pgtable.c
 delete mode 100644 arch/powerpc/mm/pgtable64.c
 create mode 100644 arch/powerpc/mm/pgtable_32.c
 create mode 100644 arch/powerpc/mm/pgtable_64.c
 delete mode 100644 arch/powerpc/mm/ppc_mmu.c
 create mode 100644 arch/powerpc/mm/ppc_mmu_32.c
 delete mode 100644 arch/powerpc/mm/tlb.c
 create mode 100644 arch/powerpc/mm/tlb_32.c

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index eb1224c24e3..06486406575 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -112,7 +112,7 @@ CFLAGS += $(cpu-as-y)
 # Default to the common case.
 KBUILD_DEFCONFIG := common_defconfig
 
-head-y				:= arch/powerpc/kernel/head.o
+head-y				:= arch/powerpc/kernel/head_32.o
 head-$(CONFIG_PPC64)		:= arch/powerpc/kernel/head_64.o
 head-$(CONFIG_8xx)		:= arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_4xx)		:= arch/powerpc/kernel/head_4xx.o
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 06d618e52f8..344cab678c6 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -10,7 +10,7 @@ CFLAGS_prom_init.o      += -fPIC
 CFLAGS_btext.o		+= -fPIC
 endif
 
-extra-$(CONFIG_PPC_STD_MMU)	:= head.o
+extra-$(CONFIG_PPC_STD_MMU)	:= head_32.o
 extra-$(CONFIG_PPC64)		:= head_64.o
 extra-$(CONFIG_40x)		:= head_4xx.o
 extra-$(CONFIG_44x)		:= head_44x.o
@@ -21,7 +21,7 @@ extra-$(CONFIG_PPC_FPU)		+= fpu.o
 extra-y				+= vmlinux.lds
 
 obj-y				:= traps.o prom.o semaphore.o
-obj-$(CONFIG_PPC32)		+= setup.o process.o
+obj-$(CONFIG_PPC32)		+= setup_32.o process.o
 obj-$(CONFIG_PPC64)		+= idle_power4.o
 ifeq ($(CONFIG_PPC32),y)
 obj-$(CONFIG_PPC_OF)		+= prom_init.o of_device.o
diff --git a/arch/powerpc/kernel/head.S b/arch/powerpc/kernel/head.S
deleted file mode 100644
index d9dbbd42674..00000000000
--- a/arch/powerpc/kernel/head.S
+++ /dev/null
@@ -1,1399 +0,0 @@
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  This file contains the low-level support and setup for the
- *  PowerPC platform, including trap and interrupt dispatch.
- *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-#ifdef CONFIG_APUS
-#include <asm/amigappc.h>
-#endif
-
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
-#define LOAD_BAT(n, reg, RA, RB)	\
-	/* see the comment for clear_bats() -- Cort */ \
-	li	RA,0;			\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	lwz	RA,(n*16)+0(reg);	\
-	lwz	RB,(n*16)+4(reg);	\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_IBAT##n##L,RB;	\
-	beq	1f;			\
-	lwz	RA,(n*16)+8(reg);	\
-	lwz	RB,(n*16)+12(reg);	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##L,RB;	\
-1:
-
-	.text
-	.stabs	"arch/ppc/kernel/",N_SO,0,0,0f
-	.stabs	"head.S",N_SO,0,0,0f
-0:
-	.globl	_stext
-_stext:
-
-/*
- * _start is defined this way because the XCOFF loader in the OpenFirmware
- * on the powermac expects the entry point to be a procedure descriptor.
- */
-	.text
-	.globl	_start
-_start:
-	/*
-	 * These are here for legacy reasons, the kernel used to
-	 * need to look like a coff function entry for the pmac
-	 * but we're always started by some kind of bootloader now.
-	 *  -- Cort
-	 */
-	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
-	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
-	nop
-
-/* PMAC
- * Enter here with the kernel text, data and bss loaded starting at
- * 0, running with virtual == physical mapping.
- * r5 points to the prom entry point (the client interface handler
- * address).  Address translation is turned on, with the prom
- * managing the hash table.  Interrupts are disabled.  The stack
- * pointer (r1) points to just below the end of the half-meg region
- * from 0x380000 - 0x400000, which is mapped in already.
- *
- * If we are booted from MacOS via BootX, we enter with the kernel
- * image loaded somewhere, and the following values in registers:
- *  r3: 'BooX' (0x426f6f58)
- *  r4: virtual address of boot_infos_t
- *  r5: 0
- *
- * APUS
- *   r3: 'APUS'
- *   r4: physical address of memory base
- *   Linux/m68k style BootInfo structure at &_end.
- *
- * PREP
- * This is jumped to on prep systems right after the kernel is relocated
- * to its proper place in memory by the boot loader.  The expected layout
- * of the regs is:
- *   r3: ptr to residual data
- *   r4: initrd_start or if no initrd then 0
- *   r5: initrd_end - unused if r4 is 0
- *   r6: Start of command line string
- *   r7: End of command line string
- *
- * This just gets a minimal mmu environment setup so we can call
- * start_here() to do the real work.
- * -- Cort
- */
-
-	.globl	__start
-__start:
-/*
- * We have to do any OF calls before we map ourselves to KERNELBASE,
- * because OF may have I/O devices mapped into that area
- * (particularly on CHRP).
- */
-	cmpwi	0,r5,0
-	beq	1f
-	bl	prom_init
-	trap
-
-1:	mr	r31,r3			/* save parameters */
-	mr	r30,r4
-	li	r24,0			/* cpu # */
-
-/*
- * early_init() does the early machine identification and does
- * the necessary low-level setup and clears the BSS
- *  -- Cort <cort@fsmlabs.com>
- */
-	bl	early_init
-
-#ifdef CONFIG_APUS
-/* On APUS the __va/__pa constants need to be set to the correct
- * values before continuing.
- */
-	mr	r4,r30
-	bl	fix_mem_constants
-#endif /* CONFIG_APUS */
-
-/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
- * the physical address we are running at, returned by early_init()
- */
- 	bl	mmu_off
-__after_mmu_off:
-	bl	clear_bats
-	bl	flush_tlbs
-
-	bl	initial_bats
-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
-	bl	setup_disp_bat
-#endif
-
-/*
- * Call setup_cpu for CPU 0 and initialize 6xx Idle
- */
-	bl	reloc_offset
-	li	r24,0			/* cpu# */
-	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_6xx
-	bl	reloc_offset
-	bl	init_idle_6xx
-#endif /* CONFIG_6xx */
-
-
-#ifndef CONFIG_APUS
-/*
- * We need to run with _start at physical address 0.
- * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
- * the exception vectors at 0 (and therefore this copy
- * overwrites OF's exception vectors with our own).
- * The MMU is off at this point.
- */
-	bl	reloc_offset
-	mr	r26,r3
-	addis	r4,r3,KERNELBASE@h	/* current address of _start */
-	cmpwi	0,r4,0			/* are we already running at 0? */
-	bne	relocate_kernel
-#endif /* CONFIG_APUS */
-/*
- * we now have the 1st 16M of ram mapped with the bats.
- * prep needs the mmu to be turned on here, but pmac already has it on.
- * this shouldn't bother the pmac since it just gets turned on again
- * as we jump to our code at KERNELBASE. -- Cort
- * Actually no, pmac doesn't have it on any more. BootX enters with MMU
- * off, and in other cases, we now turn it off before changing BATs above.
- */
-turn_on_mmu:
-	mfmsr	r0
-	ori	r0,r0,MSR_DR|MSR_IR
-	mtspr	SPRN_SRR1,r0
-	lis	r0,start_here@h
-	ori	r0,r0,start_here@l
-	mtspr	SPRN_SRR0,r0
-	SYNC
-	RFI				/* enables MMU */
-
-/*
- * We need __secondary_hold as a place to hold the other cpus on
- * an SMP machine, even when we are running a UP kernel.
- */
-	. = 0xc0			/* for prep bootloader */
-	li	r3,1			/* MTX only has 1 cpu */
-	.globl	__secondary_hold
-__secondary_hold:
-	/* tell the master we're here */
-	stw	r3,4(0)
-#ifdef CONFIG_SMP
-100:	lwz	r4,0(0)
-	/* wait until we're told to start */
-	cmpw	0,r4,r3
-	bne	100b
-	/* our cpu # was at addr 0 - go */
-	mr	r24,r3			/* cpu # */
-	b	__secondary_start
-#else
-	b	.
-#endif /* CONFIG_SMP */
-
-/*
- * Exception entry code.  This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG	\
-	mtspr	SPRN_SPRG0,r10;	\
-	mtspr	SPRN_SPRG1,r11;	\
-	mfcr	r10;		\
-	EXCEPTION_PROLOG_1;	\
-	EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1	\
-	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel */ \
-	andi.	r11,r11,MSR_PR;	\
-	tophys(r11,r1);			/* use tophys(r1) if kernel */ \
-	beq	1f;		\
-	mfspr	r11,SPRN_SPRG3;	\
-	lwz	r11,THREAD_INFO-THREAD(r11);	\
-	addi	r11,r11,THREAD_SIZE;	\
-	tophys(r11,r11);	\
-1:	subi	r11,r11,INT_FRAME_SIZE	/* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2	\
-	CLR_TOP32(r11);		\
-	stw	r10,_CCR(r11);		/* save registers */ \
-	stw	r12,GPR12(r11);	\
-	stw	r9,GPR9(r11);	\
-	mfspr	r10,SPRN_SPRG0;	\
-	stw	r10,GPR10(r11);	\
-	mfspr	r12,SPRN_SPRG1;	\
-	stw	r12,GPR11(r11);	\
-	mflr	r10;		\
-	stw	r10,_LINK(r11);	\
-	mfspr	r12,SPRN_SRR0;	\
-	mfspr	r9,SPRN_SRR1;	\
-	stw	r1,GPR1(r11);	\
-	stw	r1,0(r11);	\
-	tovirt(r1,r11);			/* set new kernel sp */	\
-	li	r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
-	MTMSRD(r10);			/* (except for mach check in rtas) */ \
-	stw	r0,GPR0(r11);	\
-	SAVE_4GPRS(3, r11);	\
-	SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer)		\
-	. = n;					\
-label:						\
-	EXCEPTION_PROLOG;			\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
-	xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)	\
-	li	r10,trap;					\
-	stw	r10,TRAP(r11);					\
-	li	r10,MSR_KERNEL;					\
-	copyee(r10, r9);					\
-	bl	tfer;						\
-i##n:								\
-	.long	hdlr;						\
-	.long	ret
-
-#define COPY_EE(d, s)		rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr)		\
-	EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full,	\
-			  ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr)		\
-	EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
-			  ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr)		\
-	EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
-			  ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr)	\
-	EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
-			  ret_from_except)
-
-/* System reset */
-/* core99 pmac starts the seconary here by changing the vector, and
-   putting it back to what it was (unknown_exception) when done.  */
-#if defined(CONFIG_GEMINI) && defined(CONFIG_SMP)
-	. = 0x100
-	b	__secondary_start_gemini
-#else
-	EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
-#endif
-
-/* Machine check */
-/*
- * On CHRP, this is complicated by the fact that we could get a
- * machine check inside RTAS, and we have no guarantee that certain
- * critical registers will have the values we expect.  The set of
- * registers that might have bad values includes all the GPRs
- * and all the BATs.  We indicate that we are in RTAS by putting
- * a non-zero value, the address of the exception frame to use,
- * in SPRG2.  The machine check handler checks SPRG2 and uses its
- * value if it is non-zero.  If we ever needed to free up SPRG2,
- * we could use a field in the thread_info or thread_struct instead.
- * (Other exception handlers assume that r1 is a valid kernel stack
- * pointer when we take an exception from supervisor mode.)
- *	-- paulus.
- */
-	. = 0x200
-	mtspr	SPRN_SPRG0,r10
-	mtspr	SPRN_SPRG1,r11
-	mfcr	r10
-#ifdef CONFIG_PPC_CHRP
-	mfspr	r11,SPRN_SPRG2
-	cmpwi	0,r11,0
-	bne	7f
-#endif /* CONFIG_PPC_CHRP */
-	EXCEPTION_PROLOG_1
-7:	EXCEPTION_PROLOG_2
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_CHRP
-	mfspr	r4,SPRN_SPRG2
-	cmpwi	cr1,r4,0
-	bne	cr1,1f
-#endif
-	EXC_XFER_STD(0x200, machine_check_exception)
-#ifdef CONFIG_PPC_CHRP
-1:	b	machine_check_in_rtas
-#endif
-
-/* Data access exception. */
-	. = 0x300
-DataAccess:
-	EXCEPTION_PROLOG
-	mfspr	r10,SPRN_DSISR
-	andis.	r0,r10,0xa470		/* weird error? */
-	bne	1f			/* if not, try to put a PTE */
-	mfspr	r4,SPRN_DAR		/* into the hash table */
-	rlwinm	r3,r10,32-15,21,21	/* DSISR_STORE -> _PAGE_RW */
-	bl	hash_page
-1:	stw	r10,_DSISR(r11)
-	mr	r5,r10
-	mfspr	r4,SPRN_DAR
-	EXC_XFER_EE_LITE(0x300, handle_page_fault)
-
-
-/* Instruction access exception. */
-	. = 0x400
-InstructionAccess:
-	EXCEPTION_PROLOG
-	andis.	r0,r9,0x4000		/* no pte found? */
-	beq	1f			/* if so, try to put a PTE */
-	li	r3,0			/* into the hash table */
-	mr	r4,r12			/* SRR0 is fault address */
-	bl	hash_page
-1:	mr	r4,r12
-	mr	r5,r9
-	EXC_XFER_EE_LITE(0x400, handle_page_fault)
-
-/* External interrupt */
-	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* Alignment exception */
-	. = 0x600
-Alignment:
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_DAR
-	stw	r4,_DAR(r11)
-	mfspr	r5,SPRN_DSISR
-	stw	r5,_DSISR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_EE(0x600, alignment_exception)
-
-/* Program check exception */
-	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-
-/* Floating-point unavailable */
-	. = 0x800
-FPUnavailable:
-	EXCEPTION_PROLOG
-	bne	load_up_fpu		/* if from user, just load it up */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
-
-/* Decrementer */
-	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
-	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
-
-/* System call */
-	. = 0xc00
-SystemCall:
-	EXCEPTION_PROLOG
-	EXC_XFER_EE_LITE(0xc00, DoSyscall)
-
-/* Single step - not used on 601 */
-	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
-
-/*
- * The Altivec unavailable trap is at 0x0f20.  Foo.
- * We effectively remap it to 0x3000.
- * We include an altivec unavailable exception vector even if
- * not configured for Altivec, so that you can't panic a
- * non-altivec kernel running on a machine with altivec just
- * by executing an altivec instruction.
- */
-	. = 0xf00
-	b	Trap_0f
-
-	. = 0xf20
-	b	AltiVecUnavailable
-
-Trap_0f:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_EE(0xf00, unknown_exception)
-
-/*
- * Handle TLB miss for instruction on 603/603e.
- * Note: we get an alternate set of r0 - r3 to use automatically.
- */
-	. = 0x1000
-InstructionTLBMiss:
-/*
- * r0:	stored ctr
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	mfctr	r0
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_IMISS
-	lis	r1,KERNELBASE@h		/* check if kernel address */
-	cmplw	0,r3,r1
-	mfspr	r2,SPRN_SPRG3
-	li	r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
-	lwz	r2,PGDIR(r2)
-	blt+	112f
-	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
-	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
-	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
-	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
-112:	tophys(r2,r2)
-	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	InstructionAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r3,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r3		/* check access & ~permission */
-	bne-	InstructionAddressInvalid /* return if access not permitted */
-	ori	r3,r3,_PAGE_ACCESSED	/* set _PAGE_ACCESSED in pte */
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	stw	r3,0(r2)		/* update PTE (accessed bit) */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwinm	r1,r3,32-10,31,31	/* _PAGE_RW -> PP lsb */
-	rlwinm	r2,r3,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
-	and	r1,r1,r2		/* writable if _RW and _DIRTY */
-	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
-	rlwimi	r3,r3,32-1,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r1,r1,0xe14		/* clear out reserved bits and M */
-	andc	r1,r3,r1		/* PP = user? (rw&dirty? 2: 3): 0 */
-	mtspr	SPRN_RPA,r1
-	mfspr	r3,SPRN_IMISS
-	tlbli	r3
-	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r3
-	rfi
-InstructionAddressInvalid:
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-
-	addis	r1,r1,0x2000
-	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
-	mtctr	r0		/* Restore CTR */
-	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
-	or	r2,r2,r1
-	mtspr	SPRN_SRR1,r2
-	mfspr	r1,SPRN_IMISS	/* Get failing address */
-	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
-	xor	r1,r1,r2
-	mtspr	SPRN_DAR,r1	/* Set fault address */
-	mfmsr	r0		/* Restore "normal" registers */
-	xoris	r0,r0,MSR_TGPR>>16
-	mtcrf	0x80,r3		/* Restore CR0 */
-	mtmsr	r0
-	b	InstructionAccess
-
-/*
- * Handle TLB miss for DATA Load operation on 603/603e
- */
-	. = 0x1100
-DataLoadTLBMiss:
-/*
- * r0:	stored ctr
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	mfctr	r0
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_DMISS
-	lis	r1,KERNELBASE@h		/* check if kernel address */
-	cmplw	0,r3,r1
-	mfspr	r2,SPRN_SPRG3
-	li	r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
-	lwz	r2,PGDIR(r2)
-	blt+	112f
-	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
-	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
-	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
-	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
-112:	tophys(r2,r2)
-	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	DataAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r3,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r3		/* check access & ~permission */
-	bne-	DataAddressInvalid	/* return if access not permitted */
-	ori	r3,r3,_PAGE_ACCESSED	/* set _PAGE_ACCESSED in pte */
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	stw	r3,0(r2)		/* update PTE (accessed bit) */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwinm	r1,r3,32-10,31,31	/* _PAGE_RW -> PP lsb */
-	rlwinm	r2,r3,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
-	and	r1,r1,r2		/* writable if _RW and _DIRTY */
-	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
-	rlwimi	r3,r3,32-1,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r1,r1,0xe14		/* clear out reserved bits and M */
-	andc	r1,r3,r1		/* PP = user? (rw&dirty? 2: 3): 0 */
-	mtspr	SPRN_RPA,r1
-	mfspr	r3,SPRN_DMISS
-	tlbld	r3
-	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r3
-	rfi
-DataAddressInvalid:
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-	addis	r1,r1,0x2000
-	mtspr	SPRN_DSISR,r1
-	mtctr	r0		/* Restore CTR */
-	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
-	mtspr	SPRN_SRR1,r2
-	mfspr	r1,SPRN_DMISS	/* Get failing address */
-	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	beq	20f		/* Jump if big endian */
-	xori	r1,r1,3
-20:	mtspr	SPRN_DAR,r1	/* Set fault address */
-	mfmsr	r0		/* Restore "normal" registers */
-	xoris	r0,r0,MSR_TGPR>>16
-	mtcrf	0x80,r3		/* Restore CR0 */
-	mtmsr	r0
-	b	DataAccess
-
-/*
- * Handle TLB miss for DATA Store on 603/603e
- */
-	. = 0x1200
-DataStoreTLBMiss:
-/*
- * r0:	stored ctr
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	mfctr	r0
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_DMISS
-	lis	r1,KERNELBASE@h		/* check if kernel address */
-	cmplw	0,r3,r1
-	mfspr	r2,SPRN_SPRG3
-	li	r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
-	lwz	r2,PGDIR(r2)
-	blt+	112f
-	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
-	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
-	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
-	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
-112:	tophys(r2,r2)
-	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	DataAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r3,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r3		/* check access & ~permission */
-	bne-	DataAddressInvalid	/* return if access not permitted */
-	ori	r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	stw	r3,0(r2)		/* update PTE (accessed/dirty bits) */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
-	li	r1,0xe15		/* clear out reserved bits and M */
-	andc	r1,r3,r1		/* PP = user? 2: 0 */
-	mtspr	SPRN_RPA,r1
-	mfspr	r3,SPRN_DMISS
-	tlbld	r3
-	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r3
-	rfi
-
-#ifndef CONFIG_ALTIVEC
-#define altivec_assist_exception	unknown_exception
-#endif
-
-	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
-	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
-	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
-	EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
-	EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
-
-	.globl mol_trampoline
-	.set mol_trampoline, i0x2f00
-
-	. = 0x3000
-
-AltiVecUnavailable:
-	EXCEPTION_PROLOG
-#ifdef CONFIG_ALTIVEC
-	bne	load_up_altivec		/* if from user, just load it up */
-#endif /* CONFIG_ALTIVEC */
-	EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
-
-#ifdef CONFIG_ALTIVEC
-/* Note that the AltiVec support is closely modeled after the FP
- * support.  Changes to one are likely to be applicable to the
- * other!  */
-load_up_altivec:
-/*
- * Disable AltiVec for the task which had AltiVec previously,
- * and save its AltiVec registers in its thread_struct.
- * Enables AltiVec for use in the kernel on return.
- * On SMP we know the AltiVec units are free, since we give it up every
- * switch.  -- Kumar
- */
-	mfmsr	r5
-	oris	r5,r5,MSR_VEC@h
-	MTMSRD(r5)			/* enable use of AltiVec now */
-	isync
-/*
- * For SMP, we don't do lazy AltiVec switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_altivec in switch_to.
- */
-#ifndef CONFIG_SMP
-	tophys(r6,0)
-	addis	r3,r6,last_task_used_altivec@ha
-	lwz	r4,last_task_used_altivec@l(r3)
-	cmpwi	0,r4,0
-	beq	1f
-	add	r4,r4,r6
-	addi	r4,r4,THREAD	/* want THREAD of last_task_used_altivec */
-	SAVE_32VRS(0,r10,r4)
-	mfvscr	vr0
-	li	r10,THREAD_VSCR
-	stvx	vr0,r10,r4
-	lwz	r5,PT_REGS(r4)
-	add	r5,r5,r6
-	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r10,MSR_VEC@h
-	andc	r4,r4,r10	/* disable altivec for previous task */
-	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-	/* enable use of AltiVec after return */
-	oris	r9,r9,MSR_VEC@h
-	mfspr	r5,SPRN_SPRG3		/* current task's THREAD (phys) */
-	li	r4,1
-	li	r10,THREAD_VSCR
-	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r5
-	mtvscr	vr0
-	REST_32VRS(0,r10,r5)
-#ifndef CONFIG_SMP
-	subi	r4,r5,THREAD
-	sub	r4,r4,r6
-	stw	r4,last_task_used_altivec@l(r3)
-#endif /* CONFIG_SMP */
-	/* restore registers and return */
-	/* we haven't used ctr or xer or lr */
-	b	fast_exception_return
-
-/*
- * AltiVec unavailable trap from kernel - print a message, but let
- * the task use AltiVec in the kernel until it returns to user mode.
- */
-KernelAltiVec:
-	lwz	r3,_MSR(r1)
-	oris	r3,r3,MSR_VEC@h
-	stw	r3,_MSR(r1)	/* enable use of AltiVec after return */
-	lis	r3,87f@h
-	ori	r3,r3,87f@l
-	mr	r4,r2		/* current */
-	lwz	r5,_NIP(r1)
-	bl	printk
-	b	ret_from_except
-87:	.string	"AltiVec used in kernel  (task=%p, pc=%x)  \n"
-	.align	4,0
-
-/*
- * giveup_altivec(tsk)
- * Disable AltiVec for the task given as the argument,
- * and save the AltiVec registers in its thread_struct.
- * Enables AltiVec for use in the kernel on return.
- */
-
-	.globl	giveup_altivec
-giveup_altivec:
-	mfmsr	r5
-	oris	r5,r5,MSR_VEC@h
-	SYNC
-	MTMSRD(r5)			/* enable use of AltiVec now */
-	isync
-	cmpwi	0,r3,0
-	beqlr-				/* if no previous owner, done */
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	lwz	r5,PT_REGS(r3)
-	cmpwi	0,r5,0
-	SAVE_32VRS(0, r4, r3)
-	mfvscr	vr0
-	li	r4,THREAD_VSCR
-	stvx	vr0,r4,r3
-	beq	1f
-	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r3,MSR_VEC@h
-	andc	r4,r4,r3		/* disable AltiVec for previous task */
-	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	lis	r4,last_task_used_altivec@ha
-	stw	r5,last_task_used_altivec@l(r4)
-#endif /* CONFIG_SMP */
-	blr
-#endif /* CONFIG_ALTIVEC */
-
-/*
- * This code is jumped to from the startup code to copy
- * the kernel image to physical address 0.
- */
-relocate_kernel:
-	addis	r9,r26,klimit@ha	/* fetch klimit */
-	lwz	r25,klimit@l(r9)
-	addis	r25,r25,-KERNELBASE@h
-	li	r3,0			/* Destination base address */
-	li	r6,0			/* Destination offset */
-	li	r5,0x4000		/* # bytes of memory to copy */
-	bl	copy_and_flush		/* copy the first 0x4000 bytes */
-	addi	r0,r3,4f@l		/* jump to the address of 4f */
-	mtctr	r0			/* in copy and do the rest. */
-	bctr				/* jump to the copy */
-4:	mr	r5,r25
-	bl	copy_and_flush		/* copy the rest */
-	b	turn_on_mmu
-
-/*
- * Copy routine used to copy the kernel to start at physical address 0
- * and flush and invalidate the caches as needed.
- * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
- * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
- */
-copy_and_flush:
-	addi	r5,r5,-4
-	addi	r6,r6,-4
-4:	li	r0,L1_CACHE_LINE_SIZE/4
-	mtctr	r0
-3:	addi	r6,r6,4			/* copy a cache line */
-	lwzx	r0,r6,r4
-	stwx	r0,r6,r3
-	bdnz	3b
-	dcbst	r6,r3			/* write it to memory */
-	sync
-	icbi	r6,r3			/* flush the icache line */
-	cmplw	0,r6,r5
-	blt	4b
-	sync				/* additional sync needed on g4 */
-	isync
-	addi	r5,r5,4
-	addi	r6,r6,4
-	blr
-
-#ifdef CONFIG_APUS
-/*
- * On APUS the physical base address of the kernel is not known at compile
- * time, which means the __pa/__va constants used are incorrect. In the
- * __init section is recorded the virtual addresses of instructions using
- * these constants, so all that has to be done is fix these before
- * continuing the kernel boot.
- *
- * r4 = The physical address of the kernel base.
- */
-fix_mem_constants:
-	mr	r10,r4
-	addis	r10,r10,-KERNELBASE@h    /* virt_to_phys constant */
-	neg	r11,r10	                 /* phys_to_virt constant */
-
-	lis	r12,__vtop_table_begin@h
-	ori	r12,r12,__vtop_table_begin@l
-	add	r12,r12,r10	         /* table begin phys address */
-	lis	r13,__vtop_table_end@h
-	ori	r13,r13,__vtop_table_end@l
-	add	r13,r13,r10	         /* table end phys address */
-	subi	r12,r12,4
-	subi	r13,r13,4
-1:	lwzu	r14,4(r12)               /* virt address of instruction */
-	add     r14,r14,r10              /* phys address of instruction */
-	lwz     r15,0(r14)               /* instruction, now insert top */
-	rlwimi  r15,r10,16,16,31         /* half of vp const in low half */
-	stw	r15,0(r14)               /* of instruction and restore. */
-	dcbst	r0,r14			 /* write it to memory */
-	sync
-	icbi	r0,r14			 /* flush the icache line */
-	cmpw	r12,r13
-	bne     1b
-	sync				/* additional sync needed on g4 */
-	isync
-
-/*
- * Map the memory where the exception handlers will
- * be copied to when hash constants have been patched.
- */
-#ifdef CONFIG_APUS_FAST_EXCEPT
-	lis	r8,0xfff0
-#else
-	lis	r8,0
-#endif
-	ori	r8,r8,0x2		/* 128KB, supervisor */
-	mtspr	SPRN_DBAT3U,r8
-	mtspr	SPRN_DBAT3L,r8
-
-	lis	r12,__ptov_table_begin@h
-	ori	r12,r12,__ptov_table_begin@l
-	add	r12,r12,r10	         /* table begin phys address */
-	lis	r13,__ptov_table_end@h
-	ori	r13,r13,__ptov_table_end@l
-	add	r13,r13,r10	         /* table end phys address */
-	subi	r12,r12,4
-	subi	r13,r13,4
-1:	lwzu	r14,4(r12)               /* virt address of instruction */
-	add     r14,r14,r10              /* phys address of instruction */
-	lwz     r15,0(r14)               /* instruction, now insert top */
-	rlwimi  r15,r11,16,16,31         /* half of pv const in low half*/
-	stw	r15,0(r14)               /* of instruction and restore. */
-	dcbst	r0,r14			 /* write it to memory */
-	sync
-	icbi	r0,r14			 /* flush the icache line */
-	cmpw	r12,r13
-	bne     1b
-
-	sync				/* additional sync needed on g4 */
-	isync				/* No speculative loading until now */
-	blr
-
-/***********************************************************************
- *  Please note that on APUS the exception handlers are located at the
- *  physical address 0xfff0000. For this reason, the exception handlers
- *  cannot use relative branches to access the code below.
- ***********************************************************************/
-#endif /* CONFIG_APUS */
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_GEMINI
-	.globl	__secondary_start_gemini
-__secondary_start_gemini:
-        mfspr   r4,SPRN_HID0
-        ori     r4,r4,HID0_ICFI
-        li      r3,0
-        ori     r3,r3,HID0_ICE
-        andc    r4,r4,r3
-        mtspr   SPRN_HID0,r4
-        sync
-        b       __secondary_start
-#endif /* CONFIG_GEMINI */
-
-	.globl	__secondary_start_pmac_0
-__secondary_start_pmac_0:
-	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
-	li	r24,0
-	b	1f
-	li	r24,1
-	b	1f
-	li	r24,2
-	b	1f
-	li	r24,3
-1:
-	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
-	   set to map the 0xf0000000 - 0xffffffff region */
-	mfmsr	r0
-	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
-	SYNC
-	mtmsr	r0
-	isync
-
-	.globl	__secondary_start
-__secondary_start:
-	/* Copy some CPU settings from CPU 0 */
-	bl	__restore_cpu_setup
-
-	lis	r3,-KERNELBASE@h
-	mr	r4,r24
-	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_6xx
-	lis	r3,-KERNELBASE@h
-	bl	init_idle_6xx
-#endif /* CONFIG_6xx */
-
-	/* get current_thread_info and current */
-	lis	r1,secondary_ti@ha
-	tophys(r1,r1)
-	lwz	r1,secondary_ti@l(r1)
-	tophys(r2,r1)
-	lwz	r2,TI_TASK(r2)
-
-	/* stack */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r0,0
-	tophys(r3,r1)
-	stw	r0,0(r3)
-
-	/* load up the MMU */
-	bl	load_up_mmu
-
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* phys address of our thread_struct */
-	CLR_TOP32(r4)
-	mtspr	SPRN_SPRG3,r4
-	li	r3,0
-	mtspr	SPRN_SPRG2,r3	/* 0 => not in RTAS */
-
-	/* enable MMU and jump to start_secondary */
-	li	r4,MSR_KERNEL
-	FIX_SRR1(r4,r5)
-	lis	r3,start_secondary@h
-	ori	r3,r3,start_secondary@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	SYNC
-	RFI
-#endif /* CONFIG_SMP */
-
-/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_6xx
- */
-#if !defined(CONFIG_6xx)
-_GLOBAL(__save_cpu_setup)
-	blr
-_GLOBAL(__restore_cpu_setup)
-	blr
-#endif /* !defined(CONFIG_6xx) */
-
-
-/*
- * Load stuff into the MMU.  Intended to be called with
- * IR=0 and DR=0.
- */
-load_up_mmu:
-	sync			/* Force all PTE updates to finish */
-	isync
-	tlbia			/* Clear all TLB entries */
-	sync			/* wait for tlbia/tlbie to finish */
-	TLBSYNC			/* ... on all CPUs */
-	/* Load the SDR1 register (hash table base & size) */
-	lis	r6,_SDR1@ha
-	tophys(r6,r6)
-	lwz	r6,_SDR1@l(r6)
-	mtspr	SPRN_SDR1,r6
-	li	r0,16		/* load up segment register values */
-	mtctr	r0		/* for context 0 */
-	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
-	li	r4,0
-3:	mtsrin	r3,r4
-	addi	r3,r3,0x111	/* increment VSID */
-	addis	r4,r4,0x1000	/* address of next segment */
-	bdnz	3b
-
-/* Load the BAT registers with the values set up by MMU_init.
-   MMU_init takes care of whether we're on a 601 or not. */
-	mfpvr	r3
-	srwi	r3,r3,16
-	cmpwi	r3,1
-	lis	r3,BATS@ha
-	addi	r3,r3,BATS@l
-	tophys(r3,r3)
-	LOAD_BAT(0,r3,r4,r5)
-	LOAD_BAT(1,r3,r4,r5)
-	LOAD_BAT(2,r3,r4,r5)
-	LOAD_BAT(3,r3,r4,r5)
-
-	blr
-
-/*
- * This is where the main kernel code starts.
- */
-start_here:
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-	/* Set up for using our exception vectors */
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* init task's THREAD */
-	CLR_TOP32(r4)
-	mtspr	SPRN_SPRG3,r4
-	li	r3,0
-	mtspr	SPRN_SPRG2,r3	/* 0 => not in RTAS */
-
-	/* stack */
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-/*
- * Do early platform-specific initialization,
- * and set up the MMU.
- */
-	mr	r3,r31
-	mr	r4,r30
-	bl	machine_init
-	bl	MMU_init
-
-#ifdef CONFIG_APUS
-	/* Copy exception code to exception vector base on APUS. */
-	lis	r4,KERNELBASE@h
-#ifdef CONFIG_APUS_FAST_EXCEPT
-	lis	r3,0xfff0		/* Copy to 0xfff00000 */
-#else
-	lis	r3,0			/* Copy to 0x00000000 */
-#endif
-	li	r5,0x4000		/* # bytes of memory to copy */
-	li	r6,0
-	bl	copy_and_flush		/* copy the first 0x4000 bytes */
-#endif  /* CONFIG_APUS */
-
-/*
- * Go back to running unmapped so we can load up new values
- * for SDR1 (hash table pointer) and the segment registers
- * and change to using our exception vectors.
- */
-	lis	r4,2f@h
-	ori	r4,r4,2f@l
-	tophys(r4,r4)
-	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
-	FIX_SRR1(r3,r5)
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	SYNC
-	RFI
-/* Load up the kernel context */
-2:	bl	load_up_mmu
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Add helper information for the Abatron bdiGDB debugger.
-	 * We do this here because we know the mmu is disabled, and
-	 * will be enabled for real in just a few instructions.
-	 */
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* This much match your Abatron config */
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	tophys(r5, r5)
-	stw	r6, 0(r5)
-#endif /* CONFIG_BDI_SWITCH */
-
-/* Now turn on the MMU for real! */
-	li	r4,MSR_KERNEL
-	FIX_SRR1(r4,r5)
-	lis	r3,start_kernel@h
-	ori	r3,r3,start_kernel@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	SYNC
-	RFI
-
-/*
- * Set up the segment registers for a new context.
- */
-_GLOBAL(set_context)
-	mulli	r3,r3,897	/* multiply context by skew factor */
-	rlwinm	r3,r3,4,8,27	/* VSID = (context & 0xfffff) << 4 */
-	addis	r3,r3,0x6000	/* Set Ks, Ku bits */
-	li	r0,NUM_USER_SEGMENTS
-	mtctr	r0
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is passed as second argument.
-	 */
-	lis	r5, KERNELBASE@h
-	lwz	r5, 0xf0(r5)
-	stw	r4, 0x4(r5)
-#endif
-	li	r4,0
-	isync
-3:
-	mtsrin	r3,r4
-	addi	r3,r3,0x111	/* next VSID */
-	rlwinm	r3,r3,0,8,3	/* clear out any overflow from VSID field */
-	addis	r4,r4,0x1000	/* address of next segment */
-	bdnz	3b
-	sync
-	isync
-	blr
-
-/*
- * An undocumented "feature" of 604e requires that the v bit
- * be cleared before changing BAT values.
- *
- * Also, newer IBM firmware does not clear bat3 and 4 so
- * this makes sure it's done.
- *  -- Cort
- */
-clear_bats:
-	li	r10,0
-	mfspr	r9,SPRN_PVR
-	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
-	cmpwi	r9, 1
-	beq	1f
-
-	mtspr	SPRN_DBAT0U,r10
-	mtspr	SPRN_DBAT0L,r10
-	mtspr	SPRN_DBAT1U,r10
-	mtspr	SPRN_DBAT1L,r10
-	mtspr	SPRN_DBAT2U,r10
-	mtspr	SPRN_DBAT2L,r10
-	mtspr	SPRN_DBAT3U,r10
-	mtspr	SPRN_DBAT3L,r10
-1:
-	mtspr	SPRN_IBAT0U,r10
-	mtspr	SPRN_IBAT0L,r10
-	mtspr	SPRN_IBAT1U,r10
-	mtspr	SPRN_IBAT1L,r10
-	mtspr	SPRN_IBAT2U,r10
-	mtspr	SPRN_IBAT2L,r10
-	mtspr	SPRN_IBAT3U,r10
-	mtspr	SPRN_IBAT3L,r10
-BEGIN_FTR_SECTION
-	/* Here's a tweak: at this point, CPU setup have
-	 * not been called yet, so HIGH_BAT_EN may not be
-	 * set in HID0 for the 745x processors. However, it
-	 * seems that doesn't affect our ability to actually
-	 * write to these SPRs.
-	 */
-	mtspr	SPRN_DBAT4U,r10
-	mtspr	SPRN_DBAT4L,r10
-	mtspr	SPRN_DBAT5U,r10
-	mtspr	SPRN_DBAT5L,r10
-	mtspr	SPRN_DBAT6U,r10
-	mtspr	SPRN_DBAT6L,r10
-	mtspr	SPRN_DBAT7U,r10
-	mtspr	SPRN_DBAT7L,r10
-	mtspr	SPRN_IBAT4U,r10
-	mtspr	SPRN_IBAT4L,r10
-	mtspr	SPRN_IBAT5U,r10
-	mtspr	SPRN_IBAT5L,r10
-	mtspr	SPRN_IBAT6U,r10
-	mtspr	SPRN_IBAT6L,r10
-	mtspr	SPRN_IBAT7U,r10
-	mtspr	SPRN_IBAT7L,r10
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
-	blr
-
-flush_tlbs:
-	lis	r10, 0x40
-1:	addic.	r10, r10, -0x1000
-	tlbie	r10
-	blt	1b
-	sync
-	blr
-
-mmu_off:
- 	addi	r4, r3, __after_mmu_off - _start
-	mfmsr	r3
-	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
-	beqlr
-	andc	r3,r3,r0
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	sync
-	RFI
-
-/*
- * Use the first pair of BAT registers to map the 1st 16MB
- * of RAM to KERNELBASE.  From this point on we can't safely
- * call OF any more.
- */
-initial_bats:
-	lis	r11,KERNELBASE@h
-	mfspr	r9,SPRN_PVR
-	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
-	cmpwi	0,r9,1
-	bne	4f
-	ori	r11,r11,4		/* set up BAT registers for 601 */
-	li	r8,0x7f			/* valid, block length = 8MB */
-	oris	r9,r11,0x800000@h	/* set up BAT reg for 2nd 8M */
-	oris	r10,r8,0x800000@h	/* set up BAT reg for 2nd 8M */
-	mtspr	SPRN_IBAT0U,r11		/* N.B. 601 has valid bit in */
-	mtspr	SPRN_IBAT0L,r8		/* lower BAT register */
-	mtspr	SPRN_IBAT1U,r9
-	mtspr	SPRN_IBAT1L,r10
-	isync
-	blr
-
-4:	tophys(r8,r11)
-#ifdef CONFIG_SMP
-	ori	r8,r8,0x12		/* R/W access, M=1 */
-#else
-	ori	r8,r8,2			/* R/W access */
-#endif /* CONFIG_SMP */
-#ifdef CONFIG_APUS
-	ori	r11,r11,BL_8M<<2|0x2	/* set up 8MB BAT registers for 604 */
-#else
-	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
-#endif /* CONFIG_APUS */
-
-	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx (not 601) have valid */
-	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
-	mtspr	SPRN_IBAT0L,r8
-	mtspr	SPRN_IBAT0U,r11
-	isync
-	blr
-
-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
-setup_disp_bat:
-	/*
-	 * setup the display bat prepared for us in prom.c
-	 */
-	mflr	r8
-	bl	reloc_offset
-	mtlr	r8
-	addis	r8,r3,disp_BAT@ha
-	addi	r8,r8,disp_BAT@l
-	lwz	r11,0(r8)
-	lwz	r8,4(r8)
-	mfspr	r9,SPRN_PVR
-	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
-	cmpwi	0,r9,1
-	beq	1f
-	mtspr	SPRN_DBAT3L,r8
-	mtspr	SPRN_DBAT3U,r11
-	blr
-1:	mtspr	SPRN_IBAT3L,r8
-	mtspr	SPRN_IBAT3U,r11
-	blr
-
-#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
-
-
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
-       .globl  m8260_gorom
-m8260_gorom:
-	mfmsr	r0
-	rlwinm	r0,r0,0,17,15	/* clear MSR_EE in r0 */
-	sync
-	mtmsr	r0
-	sync
-	mfspr	r11, SPRN_HID0
-	lis	r10, 0
-	ori	r10,r10,HID0_ICE|HID0_DCE
-	andc	r11, r11, r10
-	mtspr	SPRN_HID0, r11
-	isync
-	li	r5, MSR_ME|MSR_RI
-	lis	r6,2f@h
-	addis	r6,r6,-KERNELBASE@h
-	ori	r6,r6,2f@l
-	mtspr	SPRN_SRR0,r6
-	mtspr	SPRN_SRR1,r5
-	isync
-	sync
-	rfi
-2:
-	mtlr	r4
-	blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
-	.data
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	4096
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	4096
-
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * Used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
-	.globl	cmd_line
-cmd_line:
-	.space	512
-
-	.globl intercept_table
-intercept_table:
-	.long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
-	.long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
-	.long 0, 0, 0, i0x1300, 0, 0, 0, 0
-	.long 0, 0, 0, 0, 0, 0, 0, 0
-	.long 0, 0, 0, 0, 0, 0, 0, 0
-	.long 0, 0, 0, 0, 0, 0, 0, 0
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-	.space	8
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
new file mode 100644
index 00000000000..d9dbbd42674
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.S
@@ -0,0 +1,1399 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  This file contains the low-level support and setup for the
+ *  PowerPC platform, including trap and interrupt dispatch.
+ *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_APUS
+#include <asm/amigappc.h>
+#endif
+
+/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+#define LOAD_BAT(n, reg, RA, RB)	\
+	/* see the comment for clear_bats() -- Cort */ \
+	li	RA,0;			\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	lwz	RA,(n*16)+0(reg);	\
+	lwz	RB,(n*16)+4(reg);	\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_IBAT##n##L,RB;	\
+	beq	1f;			\
+	lwz	RA,(n*16)+8(reg);	\
+	lwz	RB,(n*16)+12(reg);	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##L,RB;	\
+1:
+
+	.text
+	.stabs	"arch/ppc/kernel/",N_SO,0,0,0f
+	.stabs	"head.S",N_SO,0,0,0f
+0:
+	.globl	_stext
+_stext:
+
+/*
+ * _start is defined this way because the XCOFF loader in the OpenFirmware
+ * on the powermac expects the entry point to be a procedure descriptor.
+ */
+	.text
+	.globl	_start
+_start:
+	/*
+	 * These are here for legacy reasons, the kernel used to
+	 * need to look like a coff function entry for the pmac
+	 * but we're always started by some kind of bootloader now.
+	 *  -- Cort
+	 */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop
+
+/* PMAC
+ * Enter here with the kernel text, data and bss loaded starting at
+ * 0, running with virtual == physical mapping.
+ * r5 points to the prom entry point (the client interface handler
+ * address).  Address translation is turned on, with the prom
+ * managing the hash table.  Interrupts are disabled.  The stack
+ * pointer (r1) points to just below the end of the half-meg region
+ * from 0x380000 - 0x400000, which is mapped in already.
+ *
+ * If we are booted from MacOS via BootX, we enter with the kernel
+ * image loaded somewhere, and the following values in registers:
+ *  r3: 'BooX' (0x426f6f58)
+ *  r4: virtual address of boot_infos_t
+ *  r5: 0
+ *
+ * APUS
+ *   r3: 'APUS'
+ *   r4: physical address of memory base
+ *   Linux/m68k style BootInfo structure at &_end.
+ *
+ * PREP
+ * This is jumped to on prep systems right after the kernel is relocated
+ * to its proper place in memory by the boot loader.  The expected layout
+ * of the regs is:
+ *   r3: ptr to residual data
+ *   r4: initrd_start or if no initrd then 0
+ *   r5: initrd_end - unused if r4 is 0
+ *   r6: Start of command line string
+ *   r7: End of command line string
+ *
+ * This just gets a minimal mmu environment setup so we can call
+ * start_here() to do the real work.
+ * -- Cort
+ */
+
+	.globl	__start
+__start:
+/*
+ * We have to do any OF calls before we map ourselves to KERNELBASE,
+ * because OF may have I/O devices mapped into that area
+ * (particularly on CHRP).
+ */
+	cmpwi	0,r5,0
+	beq	1f
+	bl	prom_init
+	trap
+
+1:	mr	r31,r3			/* save parameters */
+	mr	r30,r4
+	li	r24,0			/* cpu # */
+
+/*
+ * early_init() does the early machine identification and does
+ * the necessary low-level setup and clears the BSS
+ *  -- Cort <cort@fsmlabs.com>
+ */
+	bl	early_init
+
+#ifdef CONFIG_APUS
+/* On APUS the __va/__pa constants need to be set to the correct
+ * values before continuing.
+ */
+	mr	r4,r30
+	bl	fix_mem_constants
+#endif /* CONFIG_APUS */
+
+/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+ * the physical address we are running at, returned by early_init()
+ */
+ 	bl	mmu_off
+__after_mmu_off:
+	bl	clear_bats
+	bl	flush_tlbs
+
+	bl	initial_bats
+#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
+	bl	setup_disp_bat
+#endif
+
+/*
+ * Call setup_cpu for CPU 0 and initialize 6xx Idle
+ */
+	bl	reloc_offset
+	li	r24,0			/* cpu# */
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+#ifdef CONFIG_6xx
+	bl	reloc_offset
+	bl	init_idle_6xx
+#endif /* CONFIG_6xx */
+
+
+#ifndef CONFIG_APUS
+/*
+ * We need to run with _start at physical address 0.
+ * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+ * the exception vectors at 0 (and therefore this copy
+ * overwrites OF's exception vectors with our own).
+ * The MMU is off at this point.
+ */
+	bl	reloc_offset
+	mr	r26,r3
+	addis	r4,r3,KERNELBASE@h	/* current address of _start */
+	cmpwi	0,r4,0			/* are we already running at 0? */
+	bne	relocate_kernel
+#endif /* CONFIG_APUS */
+/*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+ * this shouldn't bother the pmac since it just gets turned on again
+ * as we jump to our code at KERNELBASE. -- Cort
+ * Actually no, pmac doesn't have it on any more. BootX enters with MMU
+ * off, and in other cases, we now turn it off before changing BATs above.
+ */
+turn_on_mmu:
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	SYNC
+	RFI				/* enables MMU */
+
+/*
+ * We need __secondary_hold as a place to hold the other cpus on
+ * an SMP machine, even when we are running a UP kernel.
+ */
+	. = 0xc0			/* for prep bootloader */
+	li	r3,1			/* MTX only has 1 cpu */
+	.globl	__secondary_hold
+__secondary_hold:
+	/* tell the master we're here */
+	stw	r3,4(0)
+#ifdef CONFIG_SMP
+100:	lwz	r4,0(0)
+	/* wait until we're told to start */
+	cmpw	0,r4,r3
+	bne	100b
+	/* our cpu # was at addr 0 - go */
+	mr	r24,r3			/* cpu # */
+	b	__secondary_start
+#else
+	b	.
+#endif /* CONFIG_SMP */
+
+/*
+ * Exception entry code.  This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+#define EXCEPTION_PROLOG	\
+	mtspr	SPRN_SPRG0,r10;	\
+	mtspr	SPRN_SPRG1,r11;	\
+	mfcr	r10;		\
+	EXCEPTION_PROLOG_1;	\
+	EXCEPTION_PROLOG_2
+
+#define EXCEPTION_PROLOG_1	\
+	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel */ \
+	andi.	r11,r11,MSR_PR;	\
+	tophys(r11,r1);			/* use tophys(r1) if kernel */ \
+	beq	1f;		\
+	mfspr	r11,SPRN_SPRG3;	\
+	lwz	r11,THREAD_INFO-THREAD(r11);	\
+	addi	r11,r11,THREAD_SIZE;	\
+	tophys(r11,r11);	\
+1:	subi	r11,r11,INT_FRAME_SIZE	/* alloc exc. frame */
+
+
+#define EXCEPTION_PROLOG_2	\
+	CLR_TOP32(r11);		\
+	stw	r10,_CCR(r11);		/* save registers */ \
+	stw	r12,GPR12(r11);	\
+	stw	r9,GPR9(r11);	\
+	mfspr	r10,SPRN_SPRG0;	\
+	stw	r10,GPR10(r11);	\
+	mfspr	r12,SPRN_SPRG1;	\
+	stw	r12,GPR11(r11);	\
+	mflr	r10;		\
+	stw	r10,_LINK(r11);	\
+	mfspr	r12,SPRN_SRR0;	\
+	mfspr	r9,SPRN_SRR1;	\
+	stw	r1,GPR1(r11);	\
+	stw	r1,0(r11);	\
+	tovirt(r1,r11);			/* set new kernel sp */	\
+	li	r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
+	MTMSRD(r10);			/* (except for mach check in rtas) */ \
+	stw	r0,GPR0(r11);	\
+	SAVE_4GPRS(3, r11);	\
+	SAVE_2GPRS(7, r11)
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#define EXCEPTION(n, label, hdlr, xfer)		\
+	. = n;					\
+label:						\
+	EXCEPTION_PROLOG;			\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
+	xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)	\
+	li	r10,trap;					\
+	stw	r10,TRAP(r11);					\
+	li	r10,MSR_KERNEL;					\
+	copyee(r10, r9);					\
+	bl	tfer;						\
+i##n:								\
+	.long	hdlr;						\
+	.long	ret
+
+#define COPY_EE(d, s)		rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full,	\
+			  ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
+			  ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
+			  ret_from_except)
+
+/* System reset */
+/* core99 pmac starts the seconary here by changing the vector, and
+   putting it back to what it was (unknown_exception) when done.  */
+#if defined(CONFIG_GEMINI) && defined(CONFIG_SMP)
+	. = 0x100
+	b	__secondary_start_gemini
+#else
+	EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+#endif
+
+/* Machine check */
+/*
+ * On CHRP, this is complicated by the fact that we could get a
+ * machine check inside RTAS, and we have no guarantee that certain
+ * critical registers will have the values we expect.  The set of
+ * registers that might have bad values includes all the GPRs
+ * and all the BATs.  We indicate that we are in RTAS by putting
+ * a non-zero value, the address of the exception frame to use,
+ * in SPRG2.  The machine check handler checks SPRG2 and uses its
+ * value if it is non-zero.  If we ever needed to free up SPRG2,
+ * we could use a field in the thread_info or thread_struct instead.
+ * (Other exception handlers assume that r1 is a valid kernel stack
+ * pointer when we take an exception from supervisor mode.)
+ *	-- paulus.
+ */
+	. = 0x200
+	mtspr	SPRN_SPRG0,r10
+	mtspr	SPRN_SPRG1,r11
+	mfcr	r10
+#ifdef CONFIG_PPC_CHRP
+	mfspr	r11,SPRN_SPRG2
+	cmpwi	0,r11,0
+	bne	7f
+#endif /* CONFIG_PPC_CHRP */
+	EXCEPTION_PROLOG_1
+7:	EXCEPTION_PROLOG_2
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_CHRP
+	mfspr	r4,SPRN_SPRG2
+	cmpwi	cr1,r4,0
+	bne	cr1,1f
+#endif
+	EXC_XFER_STD(0x200, machine_check_exception)
+#ifdef CONFIG_PPC_CHRP
+1:	b	machine_check_in_rtas
+#endif
+
+/* Data access exception. */
+	. = 0x300
+DataAccess:
+	EXCEPTION_PROLOG
+	mfspr	r10,SPRN_DSISR
+	andis.	r0,r10,0xa470		/* weird error? */
+	bne	1f			/* if not, try to put a PTE */
+	mfspr	r4,SPRN_DAR		/* into the hash table */
+	rlwinm	r3,r10,32-15,21,21	/* DSISR_STORE -> _PAGE_RW */
+	bl	hash_page
+1:	stw	r10,_DSISR(r11)
+	mr	r5,r10
+	mfspr	r4,SPRN_DAR
+	EXC_XFER_EE_LITE(0x300, handle_page_fault)
+
+
+/* Instruction access exception. */
+	. = 0x400
+InstructionAccess:
+	EXCEPTION_PROLOG
+	andis.	r0,r9,0x4000		/* no pte found? */
+	beq	1f			/* if so, try to put a PTE */
+	li	r3,0			/* into the hash table */
+	mr	r4,r12			/* SRR0 is fault address */
+	bl	hash_page
+1:	mr	r4,r12
+	mr	r5,r9
+	EXC_XFER_EE_LITE(0x400, handle_page_fault)
+
+/* External interrupt */
+	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* Alignment exception */
+	. = 0x600
+Alignment:
+	EXCEPTION_PROLOG
+	mfspr	r4,SPRN_DAR
+	stw	r4,_DAR(r11)
+	mfspr	r5,SPRN_DSISR
+	stw	r5,_DSISR(r11)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE(0x600, alignment_exception)
+
+/* Program check exception */
+	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+
+/* Floating-point unavailable */
+	. = 0x800
+FPUnavailable:
+	EXCEPTION_PROLOG
+	bne	load_up_fpu		/* if from user, just load it up */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+
+/* Decrementer */
+	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+
+	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+
+/* System call */
+	. = 0xc00
+SystemCall:
+	EXCEPTION_PROLOG
+	EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+/* Single step - not used on 601 */
+	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
+	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+
+/*
+ * The Altivec unavailable trap is at 0x0f20.  Foo.
+ * We effectively remap it to 0x3000.
+ * We include an altivec unavailable exception vector even if
+ * not configured for Altivec, so that you can't panic a
+ * non-altivec kernel running on a machine with altivec just
+ * by executing an altivec instruction.
+ */
+	. = 0xf00
+	b	Trap_0f
+
+	. = 0xf20
+	b	AltiVecUnavailable
+
+Trap_0f:
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE(0xf00, unknown_exception)
+
+/*
+ * Handle TLB miss for instruction on 603/603e.
+ * Note: we get an alternate set of r0 - r3 to use automatically.
+ */
+	. = 0x1000
+InstructionTLBMiss:
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_IMISS
+	lis	r1,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r3,r1
+	mfspr	r2,SPRN_SPRG3
+	li	r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
+	lwz	r2,PGDIR(r2)
+	blt+	112f
+	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
+	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
+	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
+112:	tophys(r2,r2)
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	InstructionAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r3,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r3		/* check access & ~permission */
+	bne-	InstructionAddressInvalid /* return if access not permitted */
+	ori	r3,r3,_PAGE_ACCESSED	/* set _PAGE_ACCESSED in pte */
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	stw	r3,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwinm	r1,r3,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r2,r3,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r1,r1,r2		/* writable if _RW and _DIRTY */
+	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r3,r3,32-1,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1,r1,0xe14		/* clear out reserved bits and M */
+	andc	r1,r3,r1		/* PP = user? (rw&dirty? 2: 3): 0 */
+	mtspr	SPRN_RPA,r1
+	mfspr	r3,SPRN_IMISS
+	tlbli	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+InstructionAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
+	mtctr	r0		/* Restore CTR */
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	or	r2,r2,r1
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_IMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
+	xor	r1,r1,r2
+	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	InstructionAccess
+
+/*
+ * Handle TLB miss for DATA Load operation on 603/603e
+ */
+	. = 0x1100
+DataLoadTLBMiss:
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r3,r1
+	mfspr	r2,SPRN_SPRG3
+	li	r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
+	lwz	r2,PGDIR(r2)
+	blt+	112f
+	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
+	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
+	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
+112:	tophys(r2,r2)
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r3,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r3		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	ori	r3,r3,_PAGE_ACCESSED	/* set _PAGE_ACCESSED in pte */
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	stw	r3,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwinm	r1,r3,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r2,r3,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r1,r1,r2		/* writable if _RW and _DIRTY */
+	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r3,r3,32-1,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1,r1,0xe14		/* clear out reserved bits and M */
+	andc	r1,r3,r1		/* PP = user? (rw&dirty? 2: 3): 0 */
+	mtspr	SPRN_RPA,r1
+	mfspr	r3,SPRN_DMISS
+	tlbld	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+DataAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1
+	mtctr	r0		/* Restore CTR */
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_DMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	beq	20f		/* Jump if big endian */
+	xori	r1,r1,3
+20:	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	DataAccess
+
+/*
+ * Handle TLB miss for DATA Store on 603/603e
+ */
+	. = 0x1200
+DataStoreTLBMiss:
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r3,r1
+	mfspr	r2,SPRN_SPRG3
+	li	r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
+	lwz	r2,PGDIR(r2)
+	blt+	112f
+	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
+	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
+	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
+112:	tophys(r2,r2)
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r3,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r3		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	ori	r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	stw	r3,0(r2)		/* update PTE (accessed/dirty bits) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
+	li	r1,0xe15		/* clear out reserved bits and M */
+	andc	r1,r3,r1		/* PP = user? 2: 0 */
+	mtspr	SPRN_RPA,r1
+	mfspr	r3,SPRN_DMISS
+	tlbld	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+
+#ifndef CONFIG_ALTIVEC
+#define altivec_assist_exception	unknown_exception
+#endif
+
+	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
+	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
+	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
+	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
+	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
+	EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
+	EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
+
+	.globl mol_trampoline
+	.set mol_trampoline, i0x2f00
+
+	. = 0x3000
+
+AltiVecUnavailable:
+	EXCEPTION_PROLOG
+#ifdef CONFIG_ALTIVEC
+	bne	load_up_altivec		/* if from user, just load it up */
+#endif /* CONFIG_ALTIVEC */
+	EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
+
+#ifdef CONFIG_ALTIVEC
+/* Note that the AltiVec support is closely modeled after the FP
+ * support.  Changes to one are likely to be applicable to the
+ * other!  */
+load_up_altivec:
+/*
+ * Disable AltiVec for the task which had AltiVec previously,
+ * and save its AltiVec registers in its thread_struct.
+ * Enables AltiVec for use in the kernel on return.
+ * On SMP we know the AltiVec units are free, since we give it up every
+ * switch.  -- Kumar
+ */
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	MTMSRD(r5)			/* enable use of AltiVec now */
+	isync
+/*
+ * For SMP, we don't do lazy AltiVec switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altivec in switch_to.
+ */
+#ifndef CONFIG_SMP
+	tophys(r6,0)
+	addis	r3,r6,last_task_used_altivec@ha
+	lwz	r4,last_task_used_altivec@l(r3)
+	cmpwi	0,r4,0
+	beq	1f
+	add	r4,r4,r6
+	addi	r4,r4,THREAD	/* want THREAD of last_task_used_altivec */
+	SAVE_32VRS(0,r10,r4)
+	mfvscr	vr0
+	li	r10,THREAD_VSCR
+	stvx	vr0,r10,r4
+	lwz	r5,PT_REGS(r4)
+	add	r5,r5,r6
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r10,MSR_VEC@h
+	andc	r4,r4,r10	/* disable altivec for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of AltiVec after return */
+	oris	r9,r9,MSR_VEC@h
+	mfspr	r5,SPRN_SPRG3		/* current task's THREAD (phys) */
+	li	r4,1
+	li	r10,THREAD_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	vr0,r10,r5
+	mtvscr	vr0
+	REST_32VRS(0,r10,r5)
+#ifndef CONFIG_SMP
+	subi	r4,r5,THREAD
+	sub	r4,r4,r6
+	stw	r4,last_task_used_altivec@l(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	/* we haven't used ctr or xer or lr */
+	b	fast_exception_return
+
+/*
+ * AltiVec unavailable trap from kernel - print a message, but let
+ * the task use AltiVec in the kernel until it returns to user mode.
+ */
+KernelAltiVec:
+	lwz	r3,_MSR(r1)
+	oris	r3,r3,MSR_VEC@h
+	stw	r3,_MSR(r1)	/* enable use of AltiVec after return */
+	lis	r3,87f@h
+	ori	r3,r3,87f@l
+	mr	r4,r2		/* current */
+	lwz	r5,_NIP(r1)
+	bl	printk
+	b	ret_from_except
+87:	.string	"AltiVec used in kernel  (task=%p, pc=%x)  \n"
+	.align	4,0
+
+/*
+ * giveup_altivec(tsk)
+ * Disable AltiVec for the task given as the argument,
+ * and save the AltiVec registers in its thread_struct.
+ * Enables AltiVec for use in the kernel on return.
+ */
+
+	.globl	giveup_altivec
+giveup_altivec:
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	SYNC
+	MTMSRD(r5)			/* enable use of AltiVec now */
+	isync
+	cmpwi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	lwz	r5,PT_REGS(r3)
+	cmpwi	0,r5,0
+	SAVE_32VRS(0, r4, r3)
+	mfvscr	vr0
+	li	r4,THREAD_VSCR
+	stvx	vr0,r4,r3
+	beq	1f
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r3,MSR_VEC@h
+	andc	r4,r4,r3		/* disable AltiVec for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	lis	r4,last_task_used_altivec@ha
+	stw	r5,last_task_used_altivec@l(r4)
+#endif /* CONFIG_SMP */
+	blr
+#endif /* CONFIG_ALTIVEC */
+
+/*
+ * This code is jumped to from the startup code to copy
+ * the kernel image to physical address 0.
+ */
+relocate_kernel:
+	addis	r9,r26,klimit@ha	/* fetch klimit */
+	lwz	r25,klimit@l(r9)
+	addis	r25,r25,-KERNELBASE@h
+	li	r3,0			/* Destination base address */
+	li	r6,0			/* Destination offset */
+	li	r5,0x4000		/* # bytes of memory to copy */
+	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+	addi	r0,r3,4f@l		/* jump to the address of 4f */
+	mtctr	r0			/* in copy and do the rest. */
+	bctr				/* jump to the copy */
+4:	mr	r5,r25
+	bl	copy_and_flush		/* copy the rest */
+	b	turn_on_mmu
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ */
+copy_and_flush:
+	addi	r5,r5,-4
+	addi	r6,r6,-4
+4:	li	r0,L1_CACHE_LINE_SIZE/4
+	mtctr	r0
+3:	addi	r6,r6,4			/* copy a cache line */
+	lwzx	r0,r6,r4
+	stwx	r0,r6,r3
+	bdnz	3b
+	dcbst	r6,r3			/* write it to memory */
+	sync
+	icbi	r6,r3			/* flush the icache line */
+	cmplw	0,r6,r5
+	blt	4b
+	sync				/* additional sync needed on g4 */
+	isync
+	addi	r5,r5,4
+	addi	r6,r6,4
+	blr
+
+#ifdef CONFIG_APUS
+/*
+ * On APUS the physical base address of the kernel is not known at compile
+ * time, which means the __pa/__va constants used are incorrect. In the
+ * __init section is recorded the virtual addresses of instructions using
+ * these constants, so all that has to be done is fix these before
+ * continuing the kernel boot.
+ *
+ * r4 = The physical address of the kernel base.
+ */
+fix_mem_constants:
+	mr	r10,r4
+	addis	r10,r10,-KERNELBASE@h    /* virt_to_phys constant */
+	neg	r11,r10	                 /* phys_to_virt constant */
+
+	lis	r12,__vtop_table_begin@h
+	ori	r12,r12,__vtop_table_begin@l
+	add	r12,r12,r10	         /* table begin phys address */
+	lis	r13,__vtop_table_end@h
+	ori	r13,r13,__vtop_table_end@l
+	add	r13,r13,r10	         /* table end phys address */
+	subi	r12,r12,4
+	subi	r13,r13,4
+1:	lwzu	r14,4(r12)               /* virt address of instruction */
+	add     r14,r14,r10              /* phys address of instruction */
+	lwz     r15,0(r14)               /* instruction, now insert top */
+	rlwimi  r15,r10,16,16,31         /* half of vp const in low half */
+	stw	r15,0(r14)               /* of instruction and restore. */
+	dcbst	r0,r14			 /* write it to memory */
+	sync
+	icbi	r0,r14			 /* flush the icache line */
+	cmpw	r12,r13
+	bne     1b
+	sync				/* additional sync needed on g4 */
+	isync
+
+/*
+ * Map the memory where the exception handlers will
+ * be copied to when hash constants have been patched.
+ */
+#ifdef CONFIG_APUS_FAST_EXCEPT
+	lis	r8,0xfff0
+#else
+	lis	r8,0
+#endif
+	ori	r8,r8,0x2		/* 128KB, supervisor */
+	mtspr	SPRN_DBAT3U,r8
+	mtspr	SPRN_DBAT3L,r8
+
+	lis	r12,__ptov_table_begin@h
+	ori	r12,r12,__ptov_table_begin@l
+	add	r12,r12,r10	         /* table begin phys address */
+	lis	r13,__ptov_table_end@h
+	ori	r13,r13,__ptov_table_end@l
+	add	r13,r13,r10	         /* table end phys address */
+	subi	r12,r12,4
+	subi	r13,r13,4
+1:	lwzu	r14,4(r12)               /* virt address of instruction */
+	add     r14,r14,r10              /* phys address of instruction */
+	lwz     r15,0(r14)               /* instruction, now insert top */
+	rlwimi  r15,r11,16,16,31         /* half of pv const in low half*/
+	stw	r15,0(r14)               /* of instruction and restore. */
+	dcbst	r0,r14			 /* write it to memory */
+	sync
+	icbi	r0,r14			 /* flush the icache line */
+	cmpw	r12,r13
+	bne     1b
+
+	sync				/* additional sync needed on g4 */
+	isync				/* No speculative loading until now */
+	blr
+
+/***********************************************************************
+ *  Please note that on APUS the exception handlers are located at the
+ *  physical address 0xfff0000. For this reason, the exception handlers
+ *  cannot use relative branches to access the code below.
+ ***********************************************************************/
+#endif /* CONFIG_APUS */
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_GEMINI
+	.globl	__secondary_start_gemini
+__secondary_start_gemini:
+        mfspr   r4,SPRN_HID0
+        ori     r4,r4,HID0_ICFI
+        li      r3,0
+        ori     r3,r3,HID0_ICE
+        andc    r4,r4,r3
+        mtspr   SPRN_HID0,r4
+        sync
+        b       __secondary_start
+#endif /* CONFIG_GEMINI */
+
+	.globl	__secondary_start_pmac_0
+__secondary_start_pmac_0:
+	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+	li	r24,0
+	b	1f
+	li	r24,1
+	b	1f
+	li	r24,2
+	b	1f
+	li	r24,3
+1:
+	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
+	   set to map the 0xf0000000 - 0xffffffff region */
+	mfmsr	r0
+	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
+	SYNC
+	mtmsr	r0
+	isync
+
+	.globl	__secondary_start
+__secondary_start:
+	/* Copy some CPU settings from CPU 0 */
+	bl	__restore_cpu_setup
+
+	lis	r3,-KERNELBASE@h
+	mr	r4,r24
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+#ifdef CONFIG_6xx
+	lis	r3,-KERNELBASE@h
+	bl	init_idle_6xx
+#endif /* CONFIG_6xx */
+
+	/* get current_thread_info and current */
+	lis	r1,secondary_ti@ha
+	tophys(r1,r1)
+	lwz	r1,secondary_ti@l(r1)
+	tophys(r2,r1)
+	lwz	r2,TI_TASK(r2)
+
+	/* stack */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r0,0
+	tophys(r3,r1)
+	stw	r0,0(r3)
+
+	/* load up the MMU */
+	bl	load_up_mmu
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* phys address of our thread_struct */
+	CLR_TOP32(r4)
+	mtspr	SPRN_SPRG3,r4
+	li	r3,0
+	mtspr	SPRN_SPRG2,r3	/* 0 => not in RTAS */
+
+	/* enable MMU and jump to start_secondary */
+	li	r4,MSR_KERNEL
+	FIX_SRR1(r4,r5)
+	lis	r3,start_secondary@h
+	ori	r3,r3,start_secondary@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	SYNC
+	RFI
+#endif /* CONFIG_SMP */
+
+/*
+ * Those generic dummy functions are kept for CPUs not
+ * included in CONFIG_6xx
+ */
+#if !defined(CONFIG_6xx)
+_GLOBAL(__save_cpu_setup)
+	blr
+_GLOBAL(__restore_cpu_setup)
+	blr
+#endif /* !defined(CONFIG_6xx) */
+
+
+/*
+ * Load stuff into the MMU.  Intended to be called with
+ * IR=0 and DR=0.
+ */
+load_up_mmu:
+	sync			/* Force all PTE updates to finish */
+	isync
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+	/* Load the SDR1 register (hash table base & size) */
+	lis	r6,_SDR1@ha
+	tophys(r6,r6)
+	lwz	r6,_SDR1@l(r6)
+	mtspr	SPRN_SDR1,r6
+	li	r0,16		/* load up segment register values */
+	mtctr	r0		/* for context 0 */
+	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
+	li	r4,0
+3:	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* increment VSID */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+
+/* Load the BAT registers with the values set up by MMU_init.
+   MMU_init takes care of whether we're on a 601 or not. */
+	mfpvr	r3
+	srwi	r3,r3,16
+	cmpwi	r3,1
+	lis	r3,BATS@ha
+	addi	r3,r3,BATS@l
+	tophys(r3,r3)
+	LOAD_BAT(0,r3,r4,r5)
+	LOAD_BAT(1,r3,r4,r5)
+	LOAD_BAT(2,r3,r4,r5)
+	LOAD_BAT(3,r3,r4,r5)
+
+	blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+	/* Set up for using our exception vectors */
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	CLR_TOP32(r4)
+	mtspr	SPRN_SPRG3,r4
+	li	r3,0
+	mtspr	SPRN_SPRG2,r3	/* 0 => not in RTAS */
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+/*
+ * Do early platform-specific initialization,
+ * and set up the MMU.
+ */
+	mr	r3,r31
+	mr	r4,r30
+	bl	machine_init
+	bl	MMU_init
+
+#ifdef CONFIG_APUS
+	/* Copy exception code to exception vector base on APUS. */
+	lis	r4,KERNELBASE@h
+#ifdef CONFIG_APUS_FAST_EXCEPT
+	lis	r3,0xfff0		/* Copy to 0xfff00000 */
+#else
+	lis	r3,0			/* Copy to 0x00000000 */
+#endif
+	li	r5,0x4000		/* # bytes of memory to copy */
+	li	r6,0
+	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+#endif  /* CONFIG_APUS */
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+ * and change to using our exception vectors.
+ */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+	FIX_SRR1(r3,r5)
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	SYNC
+	RFI
+/* Load up the kernel context */
+2:	bl	load_up_mmu
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Add helper information for the Abatron bdiGDB debugger.
+	 * We do this here because we know the mmu is disabled, and
+	 * will be enabled for real in just a few instructions.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(r0)	/* This much match your Abatron config */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	tophys(r5, r5)
+	stw	r6, 0(r5)
+#endif /* CONFIG_BDI_SWITCH */
+
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	FIX_SRR1(r4,r5)
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	SYNC
+	RFI
+
+/*
+ * Set up the segment registers for a new context.
+ */
+_GLOBAL(set_context)
+	mulli	r3,r3,897	/* multiply context by skew factor */
+	rlwinm	r3,r3,4,8,27	/* VSID = (context & 0xfffff) << 4 */
+	addis	r3,r3,0x6000	/* Set Ks, Ku bits */
+	li	r0,NUM_USER_SEGMENTS
+	mtctr	r0
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is passed as second argument.
+	 */
+	lis	r5, KERNELBASE@h
+	lwz	r5, 0xf0(r5)
+	stw	r4, 0x4(r5)
+#endif
+	li	r4,0
+	isync
+3:
+	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* next VSID */
+	rlwinm	r3,r3,0,8,3	/* clear out any overflow from VSID field */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	sync
+	isync
+	blr
+
+/*
+ * An undocumented "feature" of 604e requires that the v bit
+ * be cleared before changing BAT values.
+ *
+ * Also, newer IBM firmware does not clear bat3 and 4 so
+ * this makes sure it's done.
+ *  -- Cort
+ */
+clear_bats:
+	li	r10,0
+	mfspr	r9,SPRN_PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpwi	r9, 1
+	beq	1f
+
+	mtspr	SPRN_DBAT0U,r10
+	mtspr	SPRN_DBAT0L,r10
+	mtspr	SPRN_DBAT1U,r10
+	mtspr	SPRN_DBAT1L,r10
+	mtspr	SPRN_DBAT2U,r10
+	mtspr	SPRN_DBAT2L,r10
+	mtspr	SPRN_DBAT3U,r10
+	mtspr	SPRN_DBAT3L,r10
+1:
+	mtspr	SPRN_IBAT0U,r10
+	mtspr	SPRN_IBAT0L,r10
+	mtspr	SPRN_IBAT1U,r10
+	mtspr	SPRN_IBAT1L,r10
+	mtspr	SPRN_IBAT2U,r10
+	mtspr	SPRN_IBAT2L,r10
+	mtspr	SPRN_IBAT3U,r10
+	mtspr	SPRN_IBAT3L,r10
+BEGIN_FTR_SECTION
+	/* Here's a tweak: at this point, CPU setup have
+	 * not been called yet, so HIGH_BAT_EN may not be
+	 * set in HID0 for the 745x processors. However, it
+	 * seems that doesn't affect our ability to actually
+	 * write to these SPRs.
+	 */
+	mtspr	SPRN_DBAT4U,r10
+	mtspr	SPRN_DBAT4L,r10
+	mtspr	SPRN_DBAT5U,r10
+	mtspr	SPRN_DBAT5L,r10
+	mtspr	SPRN_DBAT6U,r10
+	mtspr	SPRN_DBAT6L,r10
+	mtspr	SPRN_DBAT7U,r10
+	mtspr	SPRN_DBAT7L,r10
+	mtspr	SPRN_IBAT4U,r10
+	mtspr	SPRN_IBAT4L,r10
+	mtspr	SPRN_IBAT5U,r10
+	mtspr	SPRN_IBAT5L,r10
+	mtspr	SPRN_IBAT6U,r10
+	mtspr	SPRN_IBAT6L,r10
+	mtspr	SPRN_IBAT7U,r10
+	mtspr	SPRN_IBAT7L,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+	blr
+
+flush_tlbs:
+	lis	r10, 0x40
+1:	addic.	r10, r10, -0x1000
+	tlbie	r10
+	blt	1b
+	sync
+	blr
+
+mmu_off:
+ 	addi	r4, r3, __after_mmu_off - _start
+	mfmsr	r3
+	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
+	beqlr
+	andc	r3,r3,r0
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	sync
+	RFI
+
+/*
+ * Use the first pair of BAT registers to map the 1st 16MB
+ * of RAM to KERNELBASE.  From this point on we can't safely
+ * call OF any more.
+ */
+initial_bats:
+	lis	r11,KERNELBASE@h
+	mfspr	r9,SPRN_PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpwi	0,r9,1
+	bne	4f
+	ori	r11,r11,4		/* set up BAT registers for 601 */
+	li	r8,0x7f			/* valid, block length = 8MB */
+	oris	r9,r11,0x800000@h	/* set up BAT reg for 2nd 8M */
+	oris	r10,r8,0x800000@h	/* set up BAT reg for 2nd 8M */
+	mtspr	SPRN_IBAT0U,r11		/* N.B. 601 has valid bit in */
+	mtspr	SPRN_IBAT0L,r8		/* lower BAT register */
+	mtspr	SPRN_IBAT1U,r9
+	mtspr	SPRN_IBAT1L,r10
+	isync
+	blr
+
+4:	tophys(r8,r11)
+#ifdef CONFIG_SMP
+	ori	r8,r8,0x12		/* R/W access, M=1 */
+#else
+	ori	r8,r8,2			/* R/W access */
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_APUS
+	ori	r11,r11,BL_8M<<2|0x2	/* set up 8MB BAT registers for 604 */
+#else
+	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
+#endif /* CONFIG_APUS */
+
+	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx (not 601) have valid */
+	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
+	mtspr	SPRN_IBAT0L,r8
+	mtspr	SPRN_IBAT0U,r11
+	isync
+	blr
+
+#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
+setup_disp_bat:
+	/*
+	 * setup the display bat prepared for us in prom.c
+	 */
+	mflr	r8
+	bl	reloc_offset
+	mtlr	r8
+	addis	r8,r3,disp_BAT@ha
+	addi	r8,r8,disp_BAT@l
+	lwz	r11,0(r8)
+	lwz	r8,4(r8)
+	mfspr	r9,SPRN_PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpwi	0,r9,1
+	beq	1f
+	mtspr	SPRN_DBAT3L,r8
+	mtspr	SPRN_DBAT3U,r11
+	blr
+1:	mtspr	SPRN_IBAT3L,r8
+	mtspr	SPRN_IBAT3U,r11
+	blr
+
+#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
+
+
+#ifdef CONFIG_8260
+/* Jump into the system reset for the rom.
+ * We first disable the MMU, and then jump to the ROM reset address.
+ *
+ * r3 is the board info structure, r4 is the location for starting.
+ * I use this for building a small kernel that can load other kernels,
+ * rather than trying to write or rely on a rom monitor that can tftp load.
+ */
+       .globl  m8260_gorom
+m8260_gorom:
+	mfmsr	r0
+	rlwinm	r0,r0,0,17,15	/* clear MSR_EE in r0 */
+	sync
+	mtmsr	r0
+	sync
+	mfspr	r11, SPRN_HID0
+	lis	r10, 0
+	ori	r10,r10,HID0_ICE|HID0_DCE
+	andc	r11, r11, r10
+	mtspr	SPRN_HID0, r11
+	isync
+	li	r5, MSR_ME|MSR_RI
+	lis	r6,2f@h
+	addis	r6,r6,-KERNELBASE@h
+	ori	r6,r6,2f@l
+	mtspr	SPRN_SRR0,r6
+	mtspr	SPRN_SRR1,r5
+	isync
+	sync
+	rfi
+2:
+	mtlr	r4
+	blr
+#endif
+
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the data segment,
+ * which is page-aligned.
+ */
+	.data
+	.globl	sdata
+sdata:
+	.globl	empty_zero_page
+empty_zero_page:
+	.space	4096
+
+	.globl	swapper_pg_dir
+swapper_pg_dir:
+	.space	4096
+
+/*
+ * This space gets a copy of optional info passed to us by the bootstrap
+ * Used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+	.globl	cmd_line
+cmd_line:
+	.space	512
+
+	.globl intercept_table
+intercept_table:
+	.long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
+	.long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
+	.long 0, 0, 0, i0x1300, 0, 0, 0, 0
+	.long 0, 0, 0, 0, 0, 0, 0, 0
+	.long 0, 0, 0, 0, 0, 0, 0, 0
+	.long 0, 0, 0, 0, 0, 0, 0, 0
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+	.space	8
diff --git a/arch/powerpc/kernel/setup.c b/arch/powerpc/kernel/setup.c
deleted file mode 100644
index 27d7f828212..00000000000
--- a/arch/powerpc/kernel/setup.c
+++ /dev/null
@@ -1,678 +0,0 @@
-/*
- * Common prep/pmac/chrp boot and setup code.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/reboot.h>
-#include <linux/delay.h>
-#include <linux/initrd.h>
-#include <linux/ide.h>
-#include <linux/tty.h>
-#include <linux/bootmem.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/cpu.h>
-#include <linux/console.h>
-
-#include <asm/residual.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/bootinfo.h>
-#include <asm/setup.h>
-#include <asm/amigappc.h>
-#include <asm/smp.h>
-#include <asm/elf.h>
-#include <asm/cputable.h>
-#include <asm/bootx.h>
-#include <asm/btext.h>
-#include <asm/machdep.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/pmac_feature.h>
-#include <asm/sections.h>
-#include <asm/nvram.h>
-#include <asm/xmon.h>
-#include <asm/ocp.h>
-
-#define USES_PPC_SYS (defined(CONFIG_85xx) || defined(CONFIG_83xx) || \
-		      defined(CONFIG_MPC10X_BRIDGE) || defined(CONFIG_8260) || \
-		      defined(CONFIG_PPC_MPC52xx))
-
-#if USES_PPC_SYS
-#include <asm/ppc_sys.h>
-#endif
-
-#if defined CONFIG_KGDB
-#include <asm/kgdb.h>
-#endif
-
-extern void platform_init(void);
-extern void bootx_init(unsigned long r4, unsigned long phys);
-
-extern void ppc6xx_idle(void);
-extern void power4_idle(void);
-
-boot_infos_t *boot_infos;
-struct ide_machdep_calls ppc_ide_md;
-
-/* Used with the BI_MEMSIZE bootinfo parameter to store the memory
-   size value reported by the boot loader. */
-unsigned long boot_mem_size;
-
-unsigned long ISA_DMA_THRESHOLD;
-unsigned int DMA_MODE_READ;
-unsigned int DMA_MODE_WRITE;
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-int _machine = 0;
-
-extern void prep_init(void);
-extern void pmac_init(void);
-extern void chrp_init(void);
-
-dev_t boot_dev;
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-#ifdef CONFIG_MAGIC_SYSRQ
-unsigned long SYSRQ_KEY = 0x54;
-#endif /* CONFIG_MAGIC_SYSRQ */
-
-#ifdef CONFIG_VGA_CONSOLE
-unsigned long vgacon_remap_base;
-#endif
-
-struct machdep_calls ppc_md;
-
-/*
- * These are used in binfmt_elf.c to put aux entries on the stack
- * for each elf executable being started.
- */
-int dcache_bsize;
-int icache_bsize;
-int ucache_bsize;
-
-#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_FB_VGA16) || \
-    defined(CONFIG_FB_VGA16_MODULE) || defined(CONFIG_FB_VESA)
-struct screen_info screen_info = {
-	0, 25,			/* orig-x, orig-y */
-	0,			/* unused */
-	0,			/* orig-video-page */
-	0,			/* orig-video-mode */
-	80,			/* orig-video-cols */
-	0,0,0,			/* ega_ax, ega_bx, ega_cx */
-	25,			/* orig-video-lines */
-	1,			/* orig-video-isVGA */
-	16			/* orig-video-points */
-};
-#endif /* CONFIG_VGA_CONSOLE || CONFIG_FB_VGA16 || CONFIG_FB_VESA */
-
-void machine_restart(char *cmd)
-{
-#ifdef CONFIG_NVRAM
-	nvram_sync();
-#endif
-	ppc_md.restart(cmd);
-}
-
-void machine_power_off(void)
-{
-#ifdef CONFIG_NVRAM
-	nvram_sync();
-#endif
-	ppc_md.power_off();
-}
-
-void machine_halt(void)
-{
-#ifdef CONFIG_NVRAM
-	nvram_sync();
-#endif
-	ppc_md.halt();
-}
-
-void (*pm_power_off)(void) = machine_power_off;
-
-#ifdef CONFIG_TAU
-extern u32 cpu_temp(unsigned long cpu);
-extern u32 cpu_temp_both(unsigned long cpu);
-#endif /* CONFIG_TAU */
-
-int show_cpuinfo(struct seq_file *m, void *v)
-{
-	int i = (int) v - 1;
-	int err = 0;
-	unsigned int pvr;
-	unsigned short maj, min;
-	unsigned long lpj;
-
-	if (i >= NR_CPUS) {
-		/* Show summary information */
-#ifdef CONFIG_SMP
-		unsigned long bogosum = 0;
-		for (i = 0; i < NR_CPUS; ++i)
-			if (cpu_online(i))
-				bogosum += cpu_data[i].loops_per_jiffy;
-		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
-			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
-#endif /* CONFIG_SMP */
-
-		if (ppc_md.show_cpuinfo != NULL)
-			err = ppc_md.show_cpuinfo(m);
-		return err;
-	}
-
-#ifdef CONFIG_SMP
-	if (!cpu_online(i))
-		return 0;
-	pvr = cpu_data[i].pvr;
-	lpj = cpu_data[i].loops_per_jiffy;
-#else
-	pvr = mfspr(SPRN_PVR);
-	lpj = loops_per_jiffy;
-#endif
-
-	seq_printf(m, "processor\t: %d\n", i);
-	seq_printf(m, "cpu\t\t: ");
-
-	if (cur_cpu_spec->pvr_mask)
-		seq_printf(m, "%s", cur_cpu_spec->cpu_name);
-	else
-		seq_printf(m, "unknown (%08x)", pvr);
-#ifdef CONFIG_ALTIVEC
-	if (cur_cpu_spec->cpu_features & CPU_FTR_ALTIVEC)
-		seq_printf(m, ", altivec supported");
-#endif
-	seq_printf(m, "\n");
-
-#ifdef CONFIG_TAU
-	if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
-#ifdef CONFIG_TAU_AVERAGE
-		/* more straightforward, but potentially misleading */
-		seq_printf(m,  "temperature \t: %u C (uncalibrated)\n",
-			   cpu_temp(i));
-#else
-		/* show the actual temp sensor range */
-		u32 temp;
-		temp = cpu_temp_both(i);
-		seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
-			   temp & 0xff, temp >> 16);
-#endif
-	}
-#endif /* CONFIG_TAU */
-
-	if (ppc_md.show_percpuinfo != NULL) {
-		err = ppc_md.show_percpuinfo(m, i);
-		if (err)
-			return err;
-	}
-
-	/* If we are a Freescale core do a simple check so
-	 * we dont have to keep adding cases in the future */
-	if ((PVR_VER(pvr) & 0x8000) == 0x8000) {
-		maj = PVR_MAJ(pvr);
-		min = PVR_MIN(pvr);
-	} else {
-		switch (PVR_VER(pvr)) {
-			case 0x0020:	/* 403 family */
-				maj = PVR_MAJ(pvr) + 1;
-				min = PVR_MIN(pvr);
-				break;
-			case 0x1008:	/* 740P/750P ?? */
-				maj = ((pvr >> 8) & 0xFF) - 1;
-				min = pvr & 0xFF;
-				break;
-			default:
-				maj = (pvr >> 8) & 0xFF;
-				min = pvr & 0xFF;
-				break;
-		}
-	}
-
-	seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
-		   maj, min, PVR_VER(pvr), PVR_REV(pvr));
-
-	seq_printf(m, "bogomips\t: %lu.%02lu\n",
-		   lpj / (500000/HZ), (lpj / (5000/HZ)) % 100);
-
-#if USES_PPC_SYS
-	if (cur_ppc_sys_spec->ppc_sys_name)
-		seq_printf(m, "chipset\t\t: %s\n",
-			cur_ppc_sys_spec->ppc_sys_name);
-#endif
-
-#ifdef CONFIG_SMP
-	seq_printf(m, "\n");
-#endif
-
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	int i = *pos;
-
-	return i <= NR_CPUS? (void *) (i + 1): NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-struct seq_operations cpuinfo_op = {
-	.start =c_start,
-	.next =	c_next,
-	.stop =	c_stop,
-	.show =	show_cpuinfo,
-};
-
-/*
- * We're called here very early in the boot.  We determine the machine
- * type and call the appropriate low-level setup functions.
- *  -- Cort <cort@fsmlabs.com>
- *
- * Note that the kernel may be running at an address which is different
- * from the address that it was linked at, so we must use RELOC/PTRRELOC
- * to access static data (including strings).  -- paulus
- */
-unsigned long __init early_init(unsigned long dt_ptr)
-{
-	unsigned long offset = reloc_offset();
-
-	reloc_got2(offset);
-
-	/*
-	 * Identify the CPU type and fix up code sections
-	 * that depend on which cpu we have.
-	 */
-	identify_cpu(offset, 0);
-	do_cpu_ftr_fixups(offset);
-
-#ifdef CONFIG_BOOTX_TEXT
-	btext_prepare_BAT();
-#endif
-
-	reloc_got2(-offset);
-
-	return KERNELBASE + offset;
-}
-
-#ifdef CONFIG_PPC_OF
-/*
- * Assume here that all clock rates are the same in a
- * smp system.  -- Cort
- */
-int
-of_show_percpuinfo(struct seq_file *m, int i)
-{
-	struct device_node *cpu_node;
-	u32 *fp;
-	int s;
-	
-	cpu_node = find_type_devices("cpu");
-	if (!cpu_node)
-		return 0;
-	for (s = 0; s < i && cpu_node->next; s++)
-		cpu_node = cpu_node->next;
-	fp = (u32 *)get_property(cpu_node, "clock-frequency", NULL);
-	if (fp)
-		seq_printf(m, "clock\t\t: %dMHz\n", *fp / 1000000);
-	return 0;
-}
-
-void __init
-intuit_machine_type(void)
-{
-	char *model;
-	struct device_node *root;
-	
-	/* ask the OF info if we're a chrp or pmac */
-	root = find_path_device("/");
-	if (root != 0) {
-		/* assume pmac unless proven to be chrp -- Cort */
-		_machine = _MACH_Pmac;
-		model = get_property(root, "device_type", NULL);
-		if (model && !strncmp("chrp", model, 4))
-			_machine = _MACH_chrp;
-		else {
-			model = get_property(root, "model", NULL);
-			if (model && !strncmp(model, "IBM", 3))
-				_machine = _MACH_chrp;
-		}
-	}
-}
-#endif
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-/*
- * The PPC_MULTIPLATFORM version of platform_init...
- */
-void __init platform_init(void)
-{
-	/* if we didn't get any bootinfo telling us what we are... */
-	if (_machine == 0) {
-		/* prep boot loader tells us if we're prep or not */
-		if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) )
-			_machine = _MACH_prep;
-	}
-
-#ifdef CONFIG_PPC_PREP
-	/* not much more to do here, if prep */
-	if (_machine == _MACH_prep) {
-		prep_init();
-		return;
-	}
-#endif
-
-#ifdef CONFIG_ADB
-	if (strstr(cmd_line, "adb_sync")) {
-		extern int __adb_probe_sync;
-		__adb_probe_sync = 1;
-	}
-#endif /* CONFIG_ADB */
-
-	switch (_machine) {
-#ifdef CONFIG_PPC_PMAC
-	case _MACH_Pmac:
-		pmac_init();
-		break;
-#endif
-#ifdef CONFIG_PPC_CHRP
-	case _MACH_chrp:
-		chrp_init();
-		break;
-#endif
-	}
-}
-
-#ifdef CONFIG_SERIAL_CORE_CONSOLE
-extern char *of_stdout_device;
-
-static int __init set_preferred_console(void)
-{
-	struct device_node *prom_stdout;
-	char *name;
-	int offset = 0;
-
-	if (of_stdout_device == NULL)
-		return -ENODEV;
-
-	/* The user has requested a console so this is already set up. */
-	if (strstr(saved_command_line, "console="))
-		return -EBUSY;
-
-	prom_stdout = find_path_device(of_stdout_device);
-	if (!prom_stdout)
-		return -ENODEV;
-
-	name = (char *)get_property(prom_stdout, "name", NULL);
-	if (!name)
-		return -ENODEV;
-
-	if (strcmp(name, "serial") == 0) {
-		int i;
-		u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i);
-		if (i > 8) {
-			switch (reg[1]) {
-				case 0x3f8:
-					offset = 0;
-					break;
-				case 0x2f8:
-					offset = 1;
-					break;
-				case 0x898:
-					offset = 2;
-					break;
-				case 0x890:
-					offset = 3;
-					break;
-				default:
-					/* We dont recognise the serial port */
-					return -ENODEV;
-			}
-		}
-	} else if (strcmp(name, "ch-a") == 0)
-		offset = 0;
-	else if (strcmp(name, "ch-b") == 0)
-		offset = 1;
-	else
-		return -ENODEV;
-	return add_preferred_console("ttyS", offset, NULL);
-}
-console_initcall(set_preferred_console);
-#endif /* CONFIG_SERIAL_CORE_CONSOLE */
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-struct bi_record *find_bootinfo(void)
-{
-	struct bi_record *rec;
-
-	rec = (struct bi_record *)_ALIGN((ulong)__bss_start+(1<<20)-1,(1<<20));
-	if ( rec->tag != BI_FIRST ) {
-		/*
-		 * This 0x10000 offset is a terrible hack but it will go away when
-		 * we have the bootloader handle all the relocation and
-		 * prom calls -- Cort
-		 */
-		rec = (struct bi_record *)_ALIGN((ulong)__bss_start+0x10000+(1<<20)-1,(1<<20));
-		if ( rec->tag != BI_FIRST )
-			return NULL;
-	}
-	return rec;
-}
-
-/*
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
- */
-void __init machine_init(unsigned long dt_ptr, unsigned long phys)
-{
-	early_init_devtree(__va(dt_ptr));
-
-#ifdef CONFIG_CMDLINE
-	strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line));
-#endif /* CONFIG_CMDLINE */
-
-#ifdef CONFIG_6xx
-	ppc_md.power_save = ppc6xx_idle;
-#endif
-#ifdef CONFIG_POWER4
-	ppc_md.power_save = power4_idle;
-#endif
-
-	platform_init();
-
-	if (ppc_md.progress)
-		ppc_md.progress("id mach(): done", 0x200);
-}
-
-#ifdef CONFIG_BOOKE_WDT
-/* Checks wdt=x and wdt_period=xx command-line option */
-int __init early_parse_wdt(char *p)
-{
-	if (p && strncmp(p, "0", 1) != 0)
-	       booke_wdt_enabled = 1;
-
-	return 0;
-}
-early_param("wdt", early_parse_wdt);
-
-int __init early_parse_wdt_period (char *p)
-{
-	if (p)
-		booke_wdt_period = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("wdt_period", early_parse_wdt_period);
-#endif	/* CONFIG_BOOKE_WDT */
-
-/* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
-{
-	if (cpu_has_feature(CPU_FTR_L2CR)) {
-		unsigned long val = simple_strtoul(str, NULL, 0);
-		printk(KERN_INFO "l2cr set to %lx\n", val);
-		_set_L2CR(0);		/* force invalidate by disable cache */
-		_set_L2CR(val);		/* and enable it */
-	}
-	return 1;
-}
-__setup("l2cr=", ppc_setup_l2cr);
-
-#ifdef CONFIG_GENERIC_NVRAM
-
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
-	if (ppc_md.nvram_read_val)
-		return ppc_md.nvram_read_val(addr);
-	return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
-	if (ppc_md.nvram_write_val)
-		ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-void nvram_sync(void)
-{
-	if (ppc_md.nvram_sync)
-		ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
-static struct cpu cpu_devices[NR_CPUS];
-
-int __init ppc_init(void)
-{
-	int i;
-
-	/* clear the progress line */
-	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
-
-	/* register CPU devices */
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_possible(i))
-			register_cpu(&cpu_devices[i], i, NULL);
-
-	/* call platform init */
-	if (ppc_md.init != NULL) {
-		ppc_md.init();
-	}
-	return 0;
-}
-
-arch_initcall(ppc_init);
-
-/* Warning, IO base is not yet inited */
-void __init setup_arch(char **cmdline_p)
-{
-	extern char *klimit;
-	extern void do_init_bootmem(void);
-
-	/* so udelay does something sensible, assume <= 1000 bogomips */
-	loops_per_jiffy = 500000000 / HZ;
-
-#ifdef CONFIG_BOOTX_TEXT
-	map_boot_text();
-#endif
-
-	unflatten_device_tree();
-	finish_device_tree();
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-	/* This could be called "early setup arch", it must be done
-	 * now because xmon need it
-	 */
-	if (_machine == _MACH_Pmac)
-		pmac_feature_init();	/* New cool way */
-#endif
-
-#ifdef CONFIG_XMON
-	xmon_map_scc();
-	if (strstr(cmd_line, "xmon"))
-		xmon(NULL);
-#endif /* CONFIG_XMON */
-	if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
-
-#if defined(CONFIG_KGDB)
-	if (ppc_md.kgdb_map_scc)
-		ppc_md.kgdb_map_scc();
-	set_debug_traps();
-	if (strstr(cmd_line, "gdb")) {
-		if (ppc_md.progress)
-			ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
-		printk("kgdb breakpoint activated\n");
-		breakpoint();
-	}
-#endif
-
-	/*
-	 * Set cache line size based on type of cpu as a default.
-	 * Systems with OF can look in the properties on the cpu node(s)
-	 * for a possibly more accurate value.
-	 */
-	if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) {
-		dcache_bsize = cur_cpu_spec->dcache_bsize;
-		icache_bsize = cur_cpu_spec->icache_bsize;
-		ucache_bsize = 0;
-	} else
-		ucache_bsize = dcache_bsize = icache_bsize
-			= cur_cpu_spec->dcache_bsize;
-
-	/* reboot on panic */
-	panic_timeout = 180;
-
-	init_mm.start_code = PAGE_OFFSET;
-	init_mm.end_code = (unsigned long) _etext;
-	init_mm.end_data = (unsigned long) _edata;
-	init_mm.brk = (unsigned long) klimit;
-
-	/* Save unparsed command line copy for /proc/cmdline */
-	strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
-	*cmdline_p = cmd_line;
-
-	parse_early_param();
-
-	/* set up the bootmem stuff with available memory */
-	do_init_bootmem();
-	if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
-
-#ifdef CONFIG_PPC_OCP
-	/* Initialize OCP device list */
-	ocp_early_init();
-	if ( ppc_md.progress ) ppc_md.progress("ocp: exit", 0x3eab);
-#endif
-
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
-	ppc_md.setup_arch();
-	if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
-
-	paging_init();
-
-	/* this is for modules since _machine can be a define -- Cort */
-	ppc_md.ppc_machine = _machine;
-}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
new file mode 100644
index 00000000000..27d7f828212
--- /dev/null
+++ b/arch/powerpc/kernel/setup_32.c
@@ -0,0 +1,678 @@
+/*
+ * Common prep/pmac/chrp boot and setup code.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/initrd.h>
+#include <linux/ide.h>
+#include <linux/tty.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+
+#include <asm/residual.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/bootinfo.h>
+#include <asm/setup.h>
+#include <asm/amigappc.h>
+#include <asm/smp.h>
+#include <asm/elf.h>
+#include <asm/cputable.h>
+#include <asm/bootx.h>
+#include <asm/btext.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/pmac_feature.h>
+#include <asm/sections.h>
+#include <asm/nvram.h>
+#include <asm/xmon.h>
+#include <asm/ocp.h>
+
+#define USES_PPC_SYS (defined(CONFIG_85xx) || defined(CONFIG_83xx) || \
+		      defined(CONFIG_MPC10X_BRIDGE) || defined(CONFIG_8260) || \
+		      defined(CONFIG_PPC_MPC52xx))
+
+#if USES_PPC_SYS
+#include <asm/ppc_sys.h>
+#endif
+
+#if defined CONFIG_KGDB
+#include <asm/kgdb.h>
+#endif
+
+extern void platform_init(void);
+extern void bootx_init(unsigned long r4, unsigned long phys);
+
+extern void ppc6xx_idle(void);
+extern void power4_idle(void);
+
+boot_infos_t *boot_infos;
+struct ide_machdep_calls ppc_ide_md;
+
+/* Used with the BI_MEMSIZE bootinfo parameter to store the memory
+   size value reported by the boot loader. */
+unsigned long boot_mem_size;
+
+unsigned long ISA_DMA_THRESHOLD;
+unsigned int DMA_MODE_READ;
+unsigned int DMA_MODE_WRITE;
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+int _machine = 0;
+
+extern void prep_init(void);
+extern void pmac_init(void);
+extern void chrp_init(void);
+
+dev_t boot_dev;
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+#ifdef CONFIG_MAGIC_SYSRQ
+unsigned long SYSRQ_KEY = 0x54;
+#endif /* CONFIG_MAGIC_SYSRQ */
+
+#ifdef CONFIG_VGA_CONSOLE
+unsigned long vgacon_remap_base;
+#endif
+
+struct machdep_calls ppc_md;
+
+/*
+ * These are used in binfmt_elf.c to put aux entries on the stack
+ * for each elf executable being started.
+ */
+int dcache_bsize;
+int icache_bsize;
+int ucache_bsize;
+
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_FB_VGA16) || \
+    defined(CONFIG_FB_VGA16_MODULE) || defined(CONFIG_FB_VESA)
+struct screen_info screen_info = {
+	0, 25,			/* orig-x, orig-y */
+	0,			/* unused */
+	0,			/* orig-video-page */
+	0,			/* orig-video-mode */
+	80,			/* orig-video-cols */
+	0,0,0,			/* ega_ax, ega_bx, ega_cx */
+	25,			/* orig-video-lines */
+	1,			/* orig-video-isVGA */
+	16			/* orig-video-points */
+};
+#endif /* CONFIG_VGA_CONSOLE || CONFIG_FB_VGA16 || CONFIG_FB_VESA */
+
+void machine_restart(char *cmd)
+{
+#ifdef CONFIG_NVRAM
+	nvram_sync();
+#endif
+	ppc_md.restart(cmd);
+}
+
+void machine_power_off(void)
+{
+#ifdef CONFIG_NVRAM
+	nvram_sync();
+#endif
+	ppc_md.power_off();
+}
+
+void machine_halt(void)
+{
+#ifdef CONFIG_NVRAM
+	nvram_sync();
+#endif
+	ppc_md.halt();
+}
+
+void (*pm_power_off)(void) = machine_power_off;
+
+#ifdef CONFIG_TAU
+extern u32 cpu_temp(unsigned long cpu);
+extern u32 cpu_temp_both(unsigned long cpu);
+#endif /* CONFIG_TAU */
+
+int show_cpuinfo(struct seq_file *m, void *v)
+{
+	int i = (int) v - 1;
+	int err = 0;
+	unsigned int pvr;
+	unsigned short maj, min;
+	unsigned long lpj;
+
+	if (i >= NR_CPUS) {
+		/* Show summary information */
+#ifdef CONFIG_SMP
+		unsigned long bogosum = 0;
+		for (i = 0; i < NR_CPUS; ++i)
+			if (cpu_online(i))
+				bogosum += cpu_data[i].loops_per_jiffy;
+		seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+			   bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
+#endif /* CONFIG_SMP */
+
+		if (ppc_md.show_cpuinfo != NULL)
+			err = ppc_md.show_cpuinfo(m);
+		return err;
+	}
+
+#ifdef CONFIG_SMP
+	if (!cpu_online(i))
+		return 0;
+	pvr = cpu_data[i].pvr;
+	lpj = cpu_data[i].loops_per_jiffy;
+#else
+	pvr = mfspr(SPRN_PVR);
+	lpj = loops_per_jiffy;
+#endif
+
+	seq_printf(m, "processor\t: %d\n", i);
+	seq_printf(m, "cpu\t\t: ");
+
+	if (cur_cpu_spec->pvr_mask)
+		seq_printf(m, "%s", cur_cpu_spec->cpu_name);
+	else
+		seq_printf(m, "unknown (%08x)", pvr);
+#ifdef CONFIG_ALTIVEC
+	if (cur_cpu_spec->cpu_features & CPU_FTR_ALTIVEC)
+		seq_printf(m, ", altivec supported");
+#endif
+	seq_printf(m, "\n");
+
+#ifdef CONFIG_TAU
+	if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
+#ifdef CONFIG_TAU_AVERAGE
+		/* more straightforward, but potentially misleading */
+		seq_printf(m,  "temperature \t: %u C (uncalibrated)\n",
+			   cpu_temp(i));
+#else
+		/* show the actual temp sensor range */
+		u32 temp;
+		temp = cpu_temp_both(i);
+		seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+			   temp & 0xff, temp >> 16);
+#endif
+	}
+#endif /* CONFIG_TAU */
+
+	if (ppc_md.show_percpuinfo != NULL) {
+		err = ppc_md.show_percpuinfo(m, i);
+		if (err)
+			return err;
+	}
+
+	/* If we are a Freescale core do a simple check so
+	 * we dont have to keep adding cases in the future */
+	if ((PVR_VER(pvr) & 0x8000) == 0x8000) {
+		maj = PVR_MAJ(pvr);
+		min = PVR_MIN(pvr);
+	} else {
+		switch (PVR_VER(pvr)) {
+			case 0x0020:	/* 403 family */
+				maj = PVR_MAJ(pvr) + 1;
+				min = PVR_MIN(pvr);
+				break;
+			case 0x1008:	/* 740P/750P ?? */
+				maj = ((pvr >> 8) & 0xFF) - 1;
+				min = pvr & 0xFF;
+				break;
+			default:
+				maj = (pvr >> 8) & 0xFF;
+				min = pvr & 0xFF;
+				break;
+		}
+	}
+
+	seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
+		   maj, min, PVR_VER(pvr), PVR_REV(pvr));
+
+	seq_printf(m, "bogomips\t: %lu.%02lu\n",
+		   lpj / (500000/HZ), (lpj / (5000/HZ)) % 100);
+
+#if USES_PPC_SYS
+	if (cur_ppc_sys_spec->ppc_sys_name)
+		seq_printf(m, "chipset\t\t: %s\n",
+			cur_ppc_sys_spec->ppc_sys_name);
+#endif
+
+#ifdef CONFIG_SMP
+	seq_printf(m, "\n");
+#endif
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	int i = *pos;
+
+	return i <= NR_CPUS? (void *) (i + 1): NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+	.start =c_start,
+	.next =	c_next,
+	.stop =	c_stop,
+	.show =	show_cpuinfo,
+};
+
+/*
+ * We're called here very early in the boot.  We determine the machine
+ * type and call the appropriate low-level setup functions.
+ *  -- Cort <cort@fsmlabs.com>
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings).  -- paulus
+ */
+unsigned long __init early_init(unsigned long dt_ptr)
+{
+	unsigned long offset = reloc_offset();
+
+	reloc_got2(offset);
+
+	/*
+	 * Identify the CPU type and fix up code sections
+	 * that depend on which cpu we have.
+	 */
+	identify_cpu(offset, 0);
+	do_cpu_ftr_fixups(offset);
+
+#ifdef CONFIG_BOOTX_TEXT
+	btext_prepare_BAT();
+#endif
+
+	reloc_got2(-offset);
+
+	return KERNELBASE + offset;
+}
+
+#ifdef CONFIG_PPC_OF
+/*
+ * Assume here that all clock rates are the same in a
+ * smp system.  -- Cort
+ */
+int
+of_show_percpuinfo(struct seq_file *m, int i)
+{
+	struct device_node *cpu_node;
+	u32 *fp;
+	int s;
+	
+	cpu_node = find_type_devices("cpu");
+	if (!cpu_node)
+		return 0;
+	for (s = 0; s < i && cpu_node->next; s++)
+		cpu_node = cpu_node->next;
+	fp = (u32 *)get_property(cpu_node, "clock-frequency", NULL);
+	if (fp)
+		seq_printf(m, "clock\t\t: %dMHz\n", *fp / 1000000);
+	return 0;
+}
+
+void __init
+intuit_machine_type(void)
+{
+	char *model;
+	struct device_node *root;
+	
+	/* ask the OF info if we're a chrp or pmac */
+	root = find_path_device("/");
+	if (root != 0) {
+		/* assume pmac unless proven to be chrp -- Cort */
+		_machine = _MACH_Pmac;
+		model = get_property(root, "device_type", NULL);
+		if (model && !strncmp("chrp", model, 4))
+			_machine = _MACH_chrp;
+		else {
+			model = get_property(root, "model", NULL);
+			if (model && !strncmp(model, "IBM", 3))
+				_machine = _MACH_chrp;
+		}
+	}
+}
+#endif
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/*
+ * The PPC_MULTIPLATFORM version of platform_init...
+ */
+void __init platform_init(void)
+{
+	/* if we didn't get any bootinfo telling us what we are... */
+	if (_machine == 0) {
+		/* prep boot loader tells us if we're prep or not */
+		if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) )
+			_machine = _MACH_prep;
+	}
+
+#ifdef CONFIG_PPC_PREP
+	/* not much more to do here, if prep */
+	if (_machine == _MACH_prep) {
+		prep_init();
+		return;
+	}
+#endif
+
+#ifdef CONFIG_ADB
+	if (strstr(cmd_line, "adb_sync")) {
+		extern int __adb_probe_sync;
+		__adb_probe_sync = 1;
+	}
+#endif /* CONFIG_ADB */
+
+	switch (_machine) {
+#ifdef CONFIG_PPC_PMAC
+	case _MACH_Pmac:
+		pmac_init();
+		break;
+#endif
+#ifdef CONFIG_PPC_CHRP
+	case _MACH_chrp:
+		chrp_init();
+		break;
+#endif
+	}
+}
+
+#ifdef CONFIG_SERIAL_CORE_CONSOLE
+extern char *of_stdout_device;
+
+static int __init set_preferred_console(void)
+{
+	struct device_node *prom_stdout;
+	char *name;
+	int offset = 0;
+
+	if (of_stdout_device == NULL)
+		return -ENODEV;
+
+	/* The user has requested a console so this is already set up. */
+	if (strstr(saved_command_line, "console="))
+		return -EBUSY;
+
+	prom_stdout = find_path_device(of_stdout_device);
+	if (!prom_stdout)
+		return -ENODEV;
+
+	name = (char *)get_property(prom_stdout, "name", NULL);
+	if (!name)
+		return -ENODEV;
+
+	if (strcmp(name, "serial") == 0) {
+		int i;
+		u32 *reg = (u32 *)get_property(prom_stdout, "reg", &i);
+		if (i > 8) {
+			switch (reg[1]) {
+				case 0x3f8:
+					offset = 0;
+					break;
+				case 0x2f8:
+					offset = 1;
+					break;
+				case 0x898:
+					offset = 2;
+					break;
+				case 0x890:
+					offset = 3;
+					break;
+				default:
+					/* We dont recognise the serial port */
+					return -ENODEV;
+			}
+		}
+	} else if (strcmp(name, "ch-a") == 0)
+		offset = 0;
+	else if (strcmp(name, "ch-b") == 0)
+		offset = 1;
+	else
+		return -ENODEV;
+	return add_preferred_console("ttyS", offset, NULL);
+}
+console_initcall(set_preferred_console);
+#endif /* CONFIG_SERIAL_CORE_CONSOLE */
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+struct bi_record *find_bootinfo(void)
+{
+	struct bi_record *rec;
+
+	rec = (struct bi_record *)_ALIGN((ulong)__bss_start+(1<<20)-1,(1<<20));
+	if ( rec->tag != BI_FIRST ) {
+		/*
+		 * This 0x10000 offset is a terrible hack but it will go away when
+		 * we have the bootloader handle all the relocation and
+		 * prom calls -- Cort
+		 */
+		rec = (struct bi_record *)_ALIGN((ulong)__bss_start+0x10000+(1<<20)-1,(1<<20));
+		if ( rec->tag != BI_FIRST )
+			return NULL;
+	}
+	return rec;
+}
+
+/*
+ * Find out what kind of machine we're on and save any data we need
+ * from the early boot process (devtree is copied on pmac by prom_init()).
+ * This is called very early on the boot process, after a minimal
+ * MMU environment has been set up but before MMU_init is called.
+ */
+void __init machine_init(unsigned long dt_ptr, unsigned long phys)
+{
+	early_init_devtree(__va(dt_ptr));
+
+#ifdef CONFIG_CMDLINE
+	strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line));
+#endif /* CONFIG_CMDLINE */
+
+#ifdef CONFIG_6xx
+	ppc_md.power_save = ppc6xx_idle;
+#endif
+#ifdef CONFIG_POWER4
+	ppc_md.power_save = power4_idle;
+#endif
+
+	platform_init();
+
+	if (ppc_md.progress)
+		ppc_md.progress("id mach(): done", 0x200);
+}
+
+#ifdef CONFIG_BOOKE_WDT
+/* Checks wdt=x and wdt_period=xx command-line option */
+int __init early_parse_wdt(char *p)
+{
+	if (p && strncmp(p, "0", 1) != 0)
+	       booke_wdt_enabled = 1;
+
+	return 0;
+}
+early_param("wdt", early_parse_wdt);
+
+int __init early_parse_wdt_period (char *p)
+{
+	if (p)
+		booke_wdt_period = simple_strtoul(p, NULL, 0);
+
+	return 0;
+}
+early_param("wdt_period", early_parse_wdt_period);
+#endif	/* CONFIG_BOOKE_WDT */
+
+/* Checks "l2cr=xxxx" command-line option */
+int __init ppc_setup_l2cr(char *str)
+{
+	if (cpu_has_feature(CPU_FTR_L2CR)) {
+		unsigned long val = simple_strtoul(str, NULL, 0);
+		printk(KERN_INFO "l2cr set to %lx\n", val);
+		_set_L2CR(0);		/* force invalidate by disable cache */
+		_set_L2CR(val);		/* and enable it */
+	}
+	return 1;
+}
+__setup("l2cr=", ppc_setup_l2cr);
+
+#ifdef CONFIG_GENERIC_NVRAM
+
+/* Generic nvram hooks used by drivers/char/gen_nvram.c */
+unsigned char nvram_read_byte(int addr)
+{
+	if (ppc_md.nvram_read_val)
+		return ppc_md.nvram_read_val(addr);
+	return 0xff;
+}
+EXPORT_SYMBOL(nvram_read_byte);
+
+void nvram_write_byte(unsigned char val, int addr)
+{
+	if (ppc_md.nvram_write_val)
+		ppc_md.nvram_write_val(addr, val);
+}
+EXPORT_SYMBOL(nvram_write_byte);
+
+void nvram_sync(void)
+{
+	if (ppc_md.nvram_sync)
+		ppc_md.nvram_sync();
+}
+EXPORT_SYMBOL(nvram_sync);
+
+#endif /* CONFIG_NVRAM */
+
+static struct cpu cpu_devices[NR_CPUS];
+
+int __init ppc_init(void)
+{
+	int i;
+
+	/* clear the progress line */
+	if ( ppc_md.progress ) ppc_md.progress("             ", 0xffff);
+
+	/* register CPU devices */
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_possible(i))
+			register_cpu(&cpu_devices[i], i, NULL);
+
+	/* call platform init */
+	if (ppc_md.init != NULL) {
+		ppc_md.init();
+	}
+	return 0;
+}
+
+arch_initcall(ppc_init);
+
+/* Warning, IO base is not yet inited */
+void __init setup_arch(char **cmdline_p)
+{
+	extern char *klimit;
+	extern void do_init_bootmem(void);
+
+	/* so udelay does something sensible, assume <= 1000 bogomips */
+	loops_per_jiffy = 500000000 / HZ;
+
+#ifdef CONFIG_BOOTX_TEXT
+	map_boot_text();
+#endif
+
+	unflatten_device_tree();
+	finish_device_tree();
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+	/* This could be called "early setup arch", it must be done
+	 * now because xmon need it
+	 */
+	if (_machine == _MACH_Pmac)
+		pmac_feature_init();	/* New cool way */
+#endif
+
+#ifdef CONFIG_XMON
+	xmon_map_scc();
+	if (strstr(cmd_line, "xmon"))
+		xmon(NULL);
+#endif /* CONFIG_XMON */
+	if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+
+#if defined(CONFIG_KGDB)
+	if (ppc_md.kgdb_map_scc)
+		ppc_md.kgdb_map_scc();
+	set_debug_traps();
+	if (strstr(cmd_line, "gdb")) {
+		if (ppc_md.progress)
+			ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
+		printk("kgdb breakpoint activated\n");
+		breakpoint();
+	}
+#endif
+
+	/*
+	 * Set cache line size based on type of cpu as a default.
+	 * Systems with OF can look in the properties on the cpu node(s)
+	 * for a possibly more accurate value.
+	 */
+	if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) {
+		dcache_bsize = cur_cpu_spec->dcache_bsize;
+		icache_bsize = cur_cpu_spec->icache_bsize;
+		ucache_bsize = 0;
+	} else
+		ucache_bsize = dcache_bsize = icache_bsize
+			= cur_cpu_spec->dcache_bsize;
+
+	/* reboot on panic */
+	panic_timeout = 180;
+
+	init_mm.start_code = PAGE_OFFSET;
+	init_mm.end_code = (unsigned long) _etext;
+	init_mm.end_data = (unsigned long) _edata;
+	init_mm.brk = (unsigned long) klimit;
+
+	/* Save unparsed command line copy for /proc/cmdline */
+	strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
+	*cmdline_p = cmd_line;
+
+	parse_early_param();
+
+	/* set up the bootmem stuff with available memory */
+	do_init_bootmem();
+	if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
+
+#ifdef CONFIG_PPC_OCP
+	/* Initialize OCP device list */
+	ocp_early_init();
+	if ( ppc_md.progress ) ppc_md.progress("ocp: exit", 0x3eab);
+#endif
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	ppc_md.setup_arch();
+	if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
+
+	paging_init();
+
+	/* this is for modules since _machine can be a define -- Cort */
+	ppc_md.ppc_machine = _machine;
+}
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 347f9798e43..a8cedb96de5 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y			:= strcase.o string.o
-obj-$(CONFIG_PPC32)	+= div64.o copy32.o checksum.o
-obj-$(CONFIG_PPC64)	+= copypage.o copyuser.o memcpy.o usercopy.o \
-			   sstep.o checksum64.o
+obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
+obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o memcpy_64.o \
+			   usercopy_64.o sstep.o checksum_64.o mem_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
diff --git a/arch/powerpc/lib/checksum.S b/arch/powerpc/lib/checksum.S
deleted file mode 100644
index 7874e8a8045..00000000000
--- a/arch/powerpc/lib/checksum.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *	
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-	.text
-
-/*
- * ip_fast_csum(buf, len) -- Optimized for IP header
- * len is in words and is always >= 5.
- */
-_GLOBAL(ip_fast_csum)
-	lwz	r0,0(r3)
-	lwzu	r5,4(r3)
-	addic.	r4,r4,-2
-	addc	r0,r0,r5
-	mtctr	r4
-	blelr-
-1:	lwzu	r4,4(r3)
-	adde	r0,r0,r4
-	bdnz	1b
-	addze	r0,r0		/* add in final carry */
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
- */	
-_GLOBAL(csum_tcpudp_magic)
-	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
-	addc	r0,r3,r4	/* add 4 32-bit words together */
-	adde	r0,r0,r5
-	adde	r0,r0,r7
-	addze	r0,r0		/* add in final carry */
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * csum_partial(buff, len, sum)
- */
-_GLOBAL(csum_partial)
-	addic	r0,r5,0
-	subi	r3,r3,4
-	srwi.	r6,r4,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r5,r3,2		/* Align buffer to longword boundary */
-	beq+	1f
-	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
-	subi	r4,r4,2
-	addc	r0,r0,r5
-	srwi.	r6,r4,2		/* # words to do */
-	beq	3f
-1:	mtctr	r6
-2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
-	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
-	bdnz	2b
-	andi.	r4,r4,3
-3:	cmpwi	0,r4,2
-	blt+	4f
-	lhz	r5,4(r3)
-	addi	r3,r3,2
-	subi	r4,r4,2
-	adde	r0,r0,r5
-4:	cmpwi	0,r4,1
-	bne+	5f
-	lbz	r5,4(r3)
-	slwi	r5,r5,8		/* Upper byte of word */
-	adde	r0,r0,r5
-5:	addze	r3,r0		/* add in final carry */
-	blr
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
- */
-_GLOBAL(csum_partial_copy_generic)
-	addic	r0,r6,0
-	subi	r3,r3,4
-	subi	r4,r4,4
-	srwi.	r6,r5,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r9,r4,2		/* Align dst to longword boundary */
-	beq+	1f
-81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
-	subi	r5,r5,2
-91:	sth	r6,4(r4)
-	addi	r4,r4,2
-	addc	r0,r0,r6
-	srwi.	r6,r5,2		/* # words to do */
-	beq	3f
-1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
-	beq	10f
-	mtctr	r6
-71:	lwz	r6,4(r3)
-72:	lwz	r9,8(r3)
-73:	lwz	r10,12(r3)
-74:	lwzu	r11,16(r3)
-	adde	r0,r0,r6
-75:	stw	r6,4(r4)
-	adde	r0,r0,r9
-76:	stw	r9,8(r4)
-	adde	r0,r0,r10
-77:	stw	r10,12(r4)
-	adde	r0,r0,r11
-78:	stwu	r11,16(r4)
-	bdnz	71b
-10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
-	beq	13f
-	mtctr	r6
-82:	lwzu	r9,4(r3)
-92:	stwu	r9,4(r4)
-	adde	r0,r0,r9
-	bdnz	82b
-13:	andi.	r5,r5,3
-3:	cmpwi	0,r5,2
-	blt+	4f
-83:	lhz	r6,4(r3)
-	addi	r3,r3,2
-	subi	r5,r5,2
-93:	sth	r6,4(r4)
-	addi	r4,r4,2
-	adde	r0,r0,r6
-4:	cmpwi	0,r5,1
-	bne+	5f
-84:	lbz	r6,4(r3)
-94:	stb	r6,4(r4)
-	slwi	r6,r6,8		/* Upper byte of word */
-	adde	r0,r0,r6
-5:	addze	r3,r0		/* add in final carry */
-	blr
-
-/* These shouldn't go in the fixup section, since that would
-   cause the ex_table addresses to get out of order. */
-
-src_error_4:
-	mfctr	r6		/* update # bytes remaining from ctr */
-	rlwimi	r5,r6,4,0,27
-	b	79f
-src_error_1:
-	li	r6,0
-	subi	r5,r5,2
-95:	sth	r6,4(r4)
-	addi	r4,r4,2
-79:	srwi.	r6,r5,2
-	beq	3f
-	mtctr	r6
-src_error_2:
-	li	r6,0
-96:	stwu	r6,4(r4)
-	bdnz	96b
-3:	andi.	r5,r5,3
-	beq	src_error
-src_error_3:
-	li	r6,0
-	mtctr	r5
-	addi	r4,r4,3
-97:	stbu	r6,1(r4)
-	bdnz	97b
-src_error:
-	cmpwi	0,r7,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r7)
-1:	addze	r3,r0
-	blr
-
-dst_error:
-	cmpwi	0,r8,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r8)
-1:	addze	r3,r0
-	blr
-
-.section __ex_table,"a"
-	.long	81b,src_error_1
-	.long	91b,dst_error
-	.long	71b,src_error_4
-	.long	72b,src_error_4
-	.long	73b,src_error_4
-	.long	74b,src_error_4
-	.long	75b,dst_error
-	.long	76b,dst_error
-	.long	77b,dst_error
-	.long	78b,dst_error
-	.long	82b,src_error_2
-	.long	92b,dst_error
-	.long	83b,src_error_3
-	.long	93b,dst_error
-	.long	84b,src_error_3
-	.long	94b,dst_error
-	.long	95b,dst_error
-	.long	96b,dst_error
-	.long	97b,dst_error
diff --git a/arch/powerpc/lib/checksum64.S b/arch/powerpc/lib/checksum64.S
deleted file mode 100644
index ef96c6c58ef..00000000000
--- a/arch/powerpc/lib/checksum64.S
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *	
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-/*
- * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
- * len is in words and is always >= 5.
- *
- * In practice len == 5, but this is not guaranteed.  So this code does not
- * attempt to use doubleword instructions.
- */
-_GLOBAL(ip_fast_csum)
-	lwz	r0,0(r3)
-	lwzu	r5,4(r3)
-	addic.	r4,r4,-2
-	addc	r0,r0,r5
-	mtctr	r4
-	blelr-
-1:	lwzu	r4,4(r3)
-	adde	r0,r0,r4
-	bdnz	1b
-	addze	r0,r0		/* add in final carry */
-        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
-        add     r0,r0,r4
-        srdi    r0,r0,32
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
- * No real gain trying to do this specially for 64 bit, but
- * the 32 bit addition may spill into the upper bits of
- * the doubleword so we still must fold it down from 64.
- */	
-_GLOBAL(csum_tcpudp_magic)
-	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
-	addc	r0,r3,r4	/* add 4 32-bit words together */
-	adde	r0,r0,r5
-	adde	r0,r0,r7
-        rldicl  r4,r0,32,0      /* fold 64 bit value */
-        add     r0,r4,r0
-        srdi    r0,r0,32
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
- *
- * This code assumes at least halfword alignment, though the length
- * can be any number of bytes.  The sum is accumulated in r5.
- *
- * csum_partial(r3=buff, r4=len, r5=sum)
- */
-_GLOBAL(csum_partial)
-        subi	r3,r3,8		/* we'll offset by 8 for the loads */
-        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
-        addic   r5,r5,0         /* clear carry */
-        beq	3f              /* if we're doing < 8 bytes */
-        andi.	r0,r3,2         /* aligned on a word boundary already? */
-        beq+	1f
-        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
-        addi    r3,r3,2
-        subi    r4,r4,2
-        addc    r5,r5,r6
-        srdi.   r6,r4,3         /* recompute number of doublewords */
-        beq     3f              /* any left? */
-1:      mtctr   r6
-2:      ldu     r6,8(r3)        /* main sum loop */
-        adde    r5,r5,r6
-        bdnz    2b
-        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
-3:	cmpwi	0,r4,4		/* is at least a full word left? */
-	blt	4f
-	lwz	r6,8(r3)	/* sum this word */
-	addi	r3,r3,4
-	subi	r4,r4,4
-	adde	r5,r5,r6
-4:	cmpwi	0,r4,2		/* is at least a halfword left? */
-        blt+	5f
-        lhz     r6,8(r3)        /* sum this halfword */
-        addi    r3,r3,2
-        subi    r4,r4,2
-        adde    r5,r5,r6
-5:	cmpwi	0,r4,1		/* is at least a byte left? */
-        bne+    6f
-        lbz     r6,8(r3)        /* sum this byte */
-        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
-        adde    r5,r5,r6
-6:      addze	r5,r5		/* add in final carry */
-	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
-        add     r3,r4,r5
-        srdi    r3,r3,32
-        blr
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * This code needs to be reworked to take advantage of 64 bit sum+copy.
- * However, due to tokenring halfword alignment problems this will be very
- * tricky.  For now we'll leave it until we instrument it somehow.
- *
- * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
- */
-_GLOBAL(csum_partial_copy_generic)
-	addic	r0,r6,0
-	subi	r3,r3,4
-	subi	r4,r4,4
-	srwi.	r6,r5,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r9,r4,2		/* Align dst to longword boundary */
-	beq+	1f
-81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
-	subi	r5,r5,2
-91:	sth	r6,4(r4)
-	addi	r4,r4,2
-	addc	r0,r0,r6
-	srwi.	r6,r5,2		/* # words to do */
-	beq	3f
-1:	mtctr	r6
-82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
-92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
-	adde	r0,r0,r6
-	bdnz	82b
-	andi.	r5,r5,3
-3:	cmpwi	0,r5,2
-	blt+	4f
-83:	lhz	r6,4(r3)
-	addi	r3,r3,2
-	subi	r5,r5,2
-93:	sth	r6,4(r4)
-	addi	r4,r4,2
-	adde	r0,r0,r6
-4:	cmpwi	0,r5,1
-	bne+	5f
-84:	lbz	r6,4(r3)
-94:	stb	r6,4(r4)
-	slwi	r6,r6,8		/* Upper byte of word */
-	adde	r0,r0,r6
-5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
-        rldicl  r4,r3,32,0      /* fold 64 bit value */
-        add     r3,r4,r3
-        srdi    r3,r3,32
-	blr
-
-/* These shouldn't go in the fixup section, since that would
-   cause the ex_table addresses to get out of order. */
-
-	.globl src_error_1
-src_error_1:
-	li	r6,0
-	subi	r5,r5,2
-95:	sth	r6,4(r4)
-	addi	r4,r4,2
-	srwi.	r6,r5,2
-	beq	3f
-	mtctr	r6
-	.globl src_error_2
-src_error_2:
-	li	r6,0
-96:	stwu	r6,4(r4)
-	bdnz	96b
-3:	andi.	r5,r5,3
-	beq	src_error
-	.globl src_error_3
-src_error_3:
-	li	r6,0
-	mtctr	r5
-	addi	r4,r4,3
-97:	stbu	r6,1(r4)
-	bdnz	97b
-	.globl src_error
-src_error:
-	cmpdi	0,r7,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r7)
-1:	addze	r3,r0
-	blr
-
-	.globl dst_error
-dst_error:
-	cmpdi	0,r8,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r8)
-1:	addze	r3,r0
-	blr
-
-.section __ex_table,"a"
-	.align  3
-	.llong	81b,src_error_1
-	.llong	91b,dst_error
-	.llong	82b,src_error_2
-	.llong	92b,dst_error
-	.llong	83b,src_error_3
-	.llong	93b,dst_error
-	.llong	84b,src_error_3
-	.llong	94b,dst_error
-	.llong	95b,dst_error
-	.llong	96b,dst_error
-	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
new file mode 100644
index 00000000000..7874e8a8045
--- /dev/null
+++ b/arch/powerpc/lib/checksum_32.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * ip_fast_csum(buf, len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * csum_partial(buff, len, sum)
+ */
+_GLOBAL(csum_partial)
+	addic	r0,r5,0
+	subi	r3,r3,4
+	srwi.	r6,r4,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r5,r3,2		/* Align buffer to longword boundary */
+	beq+	1f
+	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r4,r4,2
+	addc	r0,r0,r5
+	srwi.	r6,r4,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
+	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
+	bdnz	2b
+	andi.	r4,r4,3
+3:	cmpwi	0,r4,2
+	blt+	4f
+	lhz	r5,4(r3)
+	addi	r3,r3,2
+	subi	r4,r4,2
+	adde	r0,r0,r5
+4:	cmpwi	0,r4,1
+	bne+	5f
+	lbz	r5,4(r3)
+	slwi	r5,r5,8		/* Upper byte of word */
+	adde	r0,r0,r5
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
+	beq	10f
+	mtctr	r6
+71:	lwz	r6,4(r3)
+72:	lwz	r9,8(r3)
+73:	lwz	r10,12(r3)
+74:	lwzu	r11,16(r3)
+	adde	r0,r0,r6
+75:	stw	r6,4(r4)
+	adde	r0,r0,r9
+76:	stw	r9,8(r4)
+	adde	r0,r0,r10
+77:	stw	r10,12(r4)
+	adde	r0,r0,r11
+78:	stwu	r11,16(r4)
+	bdnz	71b
+10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
+	beq	13f
+	mtctr	r6
+82:	lwzu	r9,4(r3)
+92:	stwu	r9,4(r4)
+	adde	r0,r0,r9
+	bdnz	82b
+13:	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+src_error_4:
+	mfctr	r6		/* update # bytes remaining from ctr */
+	rlwimi	r5,r6,4,0,27
+	b	79f
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+79:	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+src_error:
+	cmpwi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+dst_error:
+	cmpwi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.long	81b,src_error_1
+	.long	91b,dst_error
+	.long	71b,src_error_4
+	.long	72b,src_error_4
+	.long	73b,src_error_4
+	.long	74b,src_error_4
+	.long	75b,dst_error
+	.long	76b,dst_error
+	.long	77b,dst_error
+	.long	78b,dst_error
+	.long	82b,src_error_2
+	.long	92b,dst_error
+	.long	83b,src_error_3
+	.long	93b,dst_error
+	.long	84b,src_error_3
+	.long	94b,dst_error
+	.long	95b,dst_error
+	.long	96b,dst_error
+	.long	97b,dst_error
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
new file mode 100644
index 00000000000..ef96c6c58ef
--- /dev/null
+++ b/arch/powerpc/lib/checksum_64.S
@@ -0,0 +1,229 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ *
+ * In practice len == 5, but this is not guaranteed.  So this code does not
+ * attempt to use doubleword instructions.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
+        add     r0,r0,r4
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
+ * No real gain trying to do this specially for 64 bit, but
+ * the 32 bit addition may spill into the upper bits of
+ * the doubleword so we still must fold it down from 64.
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+        rldicl  r4,r0,32,0      /* fold 64 bit value */
+        add     r0,r4,r0
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * This code assumes at least halfword alignment, though the length
+ * can be any number of bytes.  The sum is accumulated in r5.
+ *
+ * csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(csum_partial)
+        subi	r3,r3,8		/* we'll offset by 8 for the loads */
+        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
+        addic   r5,r5,0         /* clear carry */
+        beq	3f              /* if we're doing < 8 bytes */
+        andi.	r0,r3,2         /* aligned on a word boundary already? */
+        beq+	1f
+        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        addc    r5,r5,r6
+        srdi.   r6,r4,3         /* recompute number of doublewords */
+        beq     3f              /* any left? */
+1:      mtctr   r6
+2:      ldu     r6,8(r3)        /* main sum loop */
+        adde    r5,r5,r6
+        bdnz    2b
+        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
+3:	cmpwi	0,r4,4		/* is at least a full word left? */
+	blt	4f
+	lwz	r6,8(r3)	/* sum this word */
+	addi	r3,r3,4
+	subi	r4,r4,4
+	adde	r5,r5,r6
+4:	cmpwi	0,r4,2		/* is at least a halfword left? */
+        blt+	5f
+        lhz     r6,8(r3)        /* sum this halfword */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        adde    r5,r5,r6
+5:	cmpwi	0,r4,1		/* is at least a byte left? */
+        bne+    6f
+        lbz     r6,8(r3)        /* sum this byte */
+        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
+        adde    r5,r5,r6
+6:      addze	r5,r5		/* add in final carry */
+	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
+        add     r3,r4,r5
+        srdi    r3,r3,32
+        blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * This code needs to be reworked to take advantage of 64 bit sum+copy.
+ * However, due to tokenring halfword alignment problems this will be very
+ * tricky.  For now we'll leave it until we instrument it somehow.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
+92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
+	adde	r0,r0,r6
+	bdnz	82b
+	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
+        rldicl  r4,r3,32,0      /* fold 64 bit value */
+        add     r3,r4,r3
+        srdi    r3,r3,32
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+	.globl src_error_1
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+	.globl src_error_2
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+	.globl src_error_3
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+	.globl src_error
+src_error:
+	cmpdi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+	.globl dst_error
+dst_error:
+	cmpdi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.align  3
+	.llong	81b,src_error_1
+	.llong	91b,dst_error
+	.llong	82b,src_error_2
+	.llong	92b,dst_error
+	.llong	83b,src_error_3
+	.llong	93b,dst_error
+	.llong	84b,src_error_3
+	.llong	94b,dst_error
+	.llong	95b,dst_error
+	.llong	96b,dst_error
+	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/copy32.S b/arch/powerpc/lib/copy32.S
deleted file mode 100644
index 420a912198a..00000000000
--- a/arch/powerpc/lib/copy32.S
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- * Memory copy functions for 32-bit PowerPC.
- *
- * Copyright (C) 1996-2005 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-#define COPY_16_BYTES		\
-	lwz	r7,4(r4);	\
-	lwz	r8,8(r4);	\
-	lwz	r9,12(r4);	\
-	lwzu	r10,16(r4);	\
-	stw	r7,4(r6);	\
-	stw	r8,8(r6);	\
-	stw	r9,12(r6);	\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_WITHEX(n)	\
-8 ## n ## 0:			\
-	lwz	r7,4(r4);	\
-8 ## n ## 1:			\
-	lwz	r8,8(r4);	\
-8 ## n ## 2:			\
-	lwz	r9,12(r4);	\
-8 ## n ## 3:			\
-	lwzu	r10,16(r4);	\
-8 ## n ## 4:			\
-	stw	r7,4(r6);	\
-8 ## n ## 5:			\
-	stw	r8,8(r6);	\
-8 ## n ## 6:			\
-	stw	r9,12(r6);	\
-8 ## n ## 7:			\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_EXCODE(n)			\
-9 ## n ## 0:					\
-	addi	r5,r5,-(16 * n);		\
-	b	104f;				\
-9 ## n ## 1:					\
-	addi	r5,r5,-(16 * n);		\
-	b	105f;				\
-.section __ex_table,"a";			\
-	.align	2;				\
-	.long	8 ## n ## 0b,9 ## n ## 0b;	\
-	.long	8 ## n ## 1b,9 ## n ## 0b;	\
-	.long	8 ## n ## 2b,9 ## n ## 0b;	\
-	.long	8 ## n ## 3b,9 ## n ## 0b;	\
-	.long	8 ## n ## 4b,9 ## n ## 1b;	\
-	.long	8 ## n ## 5b,9 ## n ## 1b;	\
-	.long	8 ## n ## 6b,9 ## n ## 1b;	\
-	.long	8 ## n ## 7b,9 ## n ## 1b;	\
-	.text
-
-	.text
-	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"copy32.S",N_SO,0,0,0f
-0:
-
-CACHELINE_BYTES = L1_CACHE_LINE_SIZE
-LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
-CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
-
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- */
-_GLOBAL(cacheable_memzero)
-	mr	r5,r4
-	li	r4,0
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
-	add	r8,r7,r5
-	srwi	r9,r8,LG_CACHELINE_BYTES
-	addic.	r9,r9,-1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0,r7,CACHELINE_MASK & ~3
-	srwi.	r0,r0,2
-	beq	3f
-	mtctr	r0
-4:	stwu	r4,4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7,4
-#if !defined(CONFIG_8xx)
-10:	dcbz	r7,r6
-#else
-10:	stw	r4, 4(r6)
-	stw	r4, 8(r6)
-	stw	r4, 12(r6)
-	stw	r4, 16(r6)
-#if CACHE_LINE_SIZE >= 32
-	stw	r4, 20(r6)
-	stw	r4, 24(r6)
-	stw	r4, 28(r6)
-	stw	r4, 32(r6)
-#endif /* CACHE_LINE_SIZE */
-#endif
-	addi	r6,r6,CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
-	addi	r5,r5,4
-2:	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-_GLOBAL(memset)
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
-	add	r7,r3,r5		/* test if the src & dst overlap */
-	add	r8,r4,r5
-	cmplw	0,r4,r7
-	cmplw	1,r3,r8
-	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	memcpy			/* if regions overlap */
-
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	subf	r5,r0,r5
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	mtctr	r0
-	beq	63f
-53:
-#if !defined(CONFIG_8xx)
-	dcbz	r11,r6
-#endif
-	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	blr
-
-_GLOBAL(memmove)
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	/* fall through */
-
-_GLOBAL(memcpy)
-	srwi.	r7,r5,3
-	addi	r6,r3,-4
-	addi	r4,r4,-4
-	beq	2f			/* if less than 8 bytes to do */
-	andi.	r0,r6,3			/* get dest word aligned */
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,4(r4)
-	lwzu	r8,8(r4)
-	stw	r7,4(r6)
-	stwu	r8,8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,4(r4)
-	addi	r5,r5,-4
-	stwu	r0,4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r4,r4,3
-	addi	r6,r6,3
-4:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	4b
-	blr
-5:	subfic	r0,r0,4
-	mtctr	r0
-6:	lbz	r7,4(r4)
-	addi	r4,r4,1
-	stb	r7,4(r6)
-	addi	r6,r6,1
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-_GLOBAL(backwards_memcpy)
-	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
-	add	r6,r3,r5
-	add	r4,r4,r5
-	beq	2f
-	andi.	r0,r6,3
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,-4(r4)
-	lwzu	r8,-8(r4)
-	stw	r7,-4(r6)
-	stwu	r8,-8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,-4(r4)
-	subi	r5,r5,4
-	stwu	r0,-4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-4:	lbzu	r0,-1(r4)
-	stbu	r0,-1(r6)
-	bdnz	4b
-	blr
-5:	mtctr	r0
-6:	lbzu	r7,-1(r4)
-	stbu	r7,-1(r6)
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-_GLOBAL(__copy_tofrom_user)
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-71:	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	subf	r5,r0,r5
-	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-73:	stwu	r9,4(r6)
-	bdnz	72b
-
-	.section __ex_table,"a"
-	.align	2
-	.long	70b,100f
-	.long	71b,101f
-	.long	72b,102f
-	.long	73b,103f
-	.text
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	beq	63f
-
-#ifdef CONFIG_8xx
-	/* Don't use prefetch on 8xx */
-	mtctr	r0
-	li	r0,0
-53:	COPY_16_BYTES_WITHEX(0)
-	bdnz	53b
-
-#else /* not CONFIG_8xx */
-	/* Here we decide how far ahead to prefetch the source */
-	li	r3,4
-	cmpwi	r0,1
-	li	r7,0
-	ble	114f
-	li	r7,1
-#if MAX_COPY_PREFETCH > 1
-	/* Heuristically, for large transfers we prefetch
-	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
-	   we prefetch 1 cacheline ahead. */
-	cmpwi	r0,MAX_COPY_PREFETCH
-	ble	112f
-	li	r7,MAX_COPY_PREFETCH
-112:	mtctr	r7
-111:	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-	bdnz	111b
-#else
-	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-#endif /* MAX_COPY_PREFETCH > 1 */
-
-114:	subf	r8,r7,r0
-	mr	r0,r7
-	mtctr	r8
-
-53:	dcbt	r3,r4
-54:	dcbz	r11,r6
-	.section __ex_table,"a"
-	.align	2
-	.long	54b,105f
-	.text
-/* the main body of the cacheline loop */
-	COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES_WITHEX(2)
-	COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES_WITHEX(4)
-	COPY_16_BYTES_WITHEX(5)
-	COPY_16_BYTES_WITHEX(6)
-	COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
-	bdnz	53b
-	cmpwi	r0,0
-	li	r3,4
-	li	r7,0
-	bne	114b
-#endif /* CONFIG_8xx */
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-31:	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-41:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	li	r3,0
-	blr
-
-/* read fault, initial single-byte copy */
-100:	li	r9,0
-	b	90f
-/* write fault, initial single-byte copy */
-101:	li	r9,1
-90:	subf	r5,r8,r5
-	li	r3,0
-	b	99f
-/* read fault, initial word copy */
-102:	li	r9,0
-	b	91f
-/* write fault, initial word copy */
-103:	li	r9,1
-91:	li	r3,2
-	b	99f
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
-	COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES_EXCODE(2)
-	COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES_EXCODE(4)
-	COPY_16_BYTES_EXCODE(5)
-	COPY_16_BYTES_EXCODE(6)
-	COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-/* read fault in cacheline loop */
-104:	li	r9,0
-	b	92f
-/* fault on dcbz (effectively a write fault) */
-/* or write fault in cacheline loop */
-105:	li	r9,1
-92:	li	r3,LG_CACHELINE_BYTES
-	mfctr	r8
-	add	r0,r0,r8
-	b	106f
-/* read fault in final word loop */
-108:	li	r9,0
-	b	93f
-/* write fault in final word loop */
-109:	li	r9,1
-93:	andi.	r5,r5,3
-	li	r3,2
-	b	99f
-/* read fault in final byte loop */
-110:	li	r9,0
-	b	94f
-/* write fault in final byte loop */
-111:	li	r9,1
-94:	li	r5,0
-	li	r3,0
-/*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
- */
-99:	mfctr	r0
-106:	slw	r3,r0,r3
-	add.	r3,r3,r5
-	beq	120f			/* shouldn't happen */
-	cmpwi	0,r9,0
-	bne	120f
-/* for a read fault, first try to continue the copy one byte at a time */
-	mtctr	r3
-130:	lbz	r0,4(r4)
-131:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	130b
-/* then clear out the destination: r3 bytes starting at 4(r6) */
-132:	mfctr	r3
-	srwi.	r0,r3,2
-	li	r9,0
-	mtctr	r0
-	beq	113f
-112:	stwu	r9,4(r6)
-	bdnz	112b
-113:	andi.	r0,r3,3
-	mtctr	r0
-	beq	120f
-114:	stb	r9,4(r6)
-	addi	r6,r6,1
-	bdnz	114b
-120:	blr
-
-	.section __ex_table,"a"
-	.align	2
-	.long	30b,108b
-	.long	31b,109b
-	.long	40b,110b
-	.long	41b,111b
-	.long	130b,132b
-	.long	131b,120b
-	.long	112b,120b
-	.long	114b,120b
-	.text
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
new file mode 100644
index 00000000000..420a912198a
--- /dev/null
+++ b/arch/powerpc/lib/copy_32.S
@@ -0,0 +1,543 @@
+/*
+ * Memory copy functions for 32-bit PowerPC.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+#define COPY_16_BYTES		\
+	lwz	r7,4(r4);	\
+	lwz	r8,8(r4);	\
+	lwz	r9,12(r4);	\
+	lwzu	r10,16(r4);	\
+	stw	r7,4(r6);	\
+	stw	r8,8(r6);	\
+	stw	r9,12(r6);	\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n)			\
+9 ## n ## 0:					\
+	addi	r5,r5,-(16 * n);		\
+	b	104f;				\
+9 ## n ## 1:					\
+	addi	r5,r5,-(16 * n);		\
+	b	105f;				\
+.section __ex_table,"a";			\
+	.align	2;				\
+	.long	8 ## n ## 0b,9 ## n ## 0b;	\
+	.long	8 ## n ## 1b,9 ## n ## 0b;	\
+	.long	8 ## n ## 2b,9 ## n ## 0b;	\
+	.long	8 ## n ## 3b,9 ## n ## 0b;	\
+	.long	8 ## n ## 4b,9 ## n ## 1b;	\
+	.long	8 ## n ## 5b,9 ## n ## 1b;	\
+	.long	8 ## n ## 6b,9 ## n ## 1b;	\
+	.long	8 ## n ## 7b,9 ## n ## 1b;	\
+	.text
+
+	.text
+	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
+	.stabs	"copy32.S",N_SO,0,0,0f
+0:
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+	mr	r5,r4
+	li	r4,0
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
+	add	r8,r7,r5
+	srwi	r9,r8,LG_CACHELINE_BYTES
+	addic.	r9,r9,-1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0,r7,CACHELINE_MASK & ~3
+	srwi.	r0,r0,2
+	beq	3f
+	mtctr	r0
+4:	stwu	r4,4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7,4
+#if !defined(CONFIG_8xx)
+10:	dcbz	r7,r6
+#else
+10:	stw	r4, 4(r6)
+	stw	r4, 8(r6)
+	stw	r4, 12(r6)
+	stw	r4, 16(r6)
+#if CACHE_LINE_SIZE >= 32
+	stw	r4, 20(r6)
+	stw	r4, 24(r6)
+	stw	r4, 28(r6)
+	stw	r4, 32(r6)
+#endif /* CACHE_LINE_SIZE */
+#endif
+	addi	r6,r6,CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
+	addi	r5,r5,4
+2:	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+_GLOBAL(memset)
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+	add	r7,r3,r5		/* test if the src & dst overlap */
+	add	r8,r4,r5
+	cmplw	0,r4,r7
+	cmplw	1,r3,r8
+	crand	0,0,4			/* cr0.lt &= cr1.lt */
+	blt	memcpy			/* if regions overlap */
+
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	subf	r5,r0,r5
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	mtctr	r0
+	beq	63f
+53:
+#if !defined(CONFIG_8xx)
+	dcbz	r11,r6
+#endif
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	53b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL(memcpy)
+	srwi.	r7,r5,3
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(__copy_tofrom_user)
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+71:	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+	.section __ex_table,"a"
+	.align	2
+	.long	70b,100f
+	.long	71b,101f
+	.long	72b,102f
+	.long	73b,103f
+	.text
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+#ifdef CONFIG_8xx
+	/* Don't use prefetch on 8xx */
+	mtctr	r0
+	li	r0,0
+53:	COPY_16_BYTES_WITHEX(0)
+	bdnz	53b
+
+#else /* not CONFIG_8xx */
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+	.section __ex_table,"a"
+	.align	2
+	.long	54b,105f
+	.text
+/* the main body of the cacheline loop */
+	COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_WITHEX(2)
+	COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_WITHEX(4)
+	COPY_16_BYTES_WITHEX(5)
+	COPY_16_BYTES_WITHEX(6)
+	COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+#endif /* CONFIG_8xx */
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+41:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	li	r3,0
+	blr
+
+/* read fault, initial single-byte copy */
+100:	li	r9,0
+	b	90f
+/* write fault, initial single-byte copy */
+101:	li	r9,1
+90:	subf	r5,r8,r5
+	li	r3,0
+	b	99f
+/* read fault, initial word copy */
+102:	li	r9,0
+	b	91f
+/* write fault, initial word copy */
+103:	li	r9,1
+91:	li	r3,2
+	b	99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+	COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_EXCODE(2)
+	COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_EXCODE(4)
+	COPY_16_BYTES_EXCODE(5)
+	COPY_16_BYTES_EXCODE(6)
+	COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104:	li	r9,0
+	b	92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105:	li	r9,1
+92:	li	r3,LG_CACHELINE_BYTES
+	mfctr	r8
+	add	r0,r0,r8
+	b	106f
+/* read fault in final word loop */
+108:	li	r9,0
+	b	93f
+/* write fault in final word loop */
+109:	li	r9,1
+93:	andi.	r5,r5,3
+	li	r3,2
+	b	99f
+/* read fault in final byte loop */
+110:	li	r9,0
+	b	94f
+/* write fault in final byte loop */
+111:	li	r9,1
+94:	li	r5,0
+	li	r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99:	mfctr	r0
+106:	slw	r3,r0,r3
+	add.	r3,r3,r5
+	beq	120f			/* shouldn't happen */
+	cmpwi	0,r9,0
+	bne	120f
+/* for a read fault, first try to continue the copy one byte at a time */
+	mtctr	r3
+130:	lbz	r0,4(r4)
+131:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132:	mfctr	r3
+	srwi.	r0,r3,2
+	li	r9,0
+	mtctr	r0
+	beq	113f
+112:	stwu	r9,4(r6)
+	bdnz	112b
+113:	andi.	r0,r3,3
+	mtctr	r0
+	beq	120f
+114:	stb	r9,4(r6)
+	addi	r6,r6,1
+	bdnz	114b
+120:	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	30b,108b
+	.long	31b,109b
+	.long	40b,110b
+	.long	41b,111b
+	.long	130b,132b
+	.long	131b,120b
+	.long	112b,120b
+	.long	114b,120b
+	.text
diff --git a/arch/powerpc/lib/copypage.S b/arch/powerpc/lib/copypage.S
deleted file mode 100644
index 733d61618bb..00000000000
--- a/arch/powerpc/lib/copypage.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * arch/ppc64/lib/copypage.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-_GLOBAL(copy_page)
-	std	r31,-8(1)
-	std	r30,-16(1)
-	std	r29,-24(1)
-	std	r28,-32(1)
-	std	r27,-40(1)
-	std	r26,-48(1)
-	std	r25,-56(1)
-	std	r24,-64(1)
-	std	r23,-72(1)
-	std	r22,-80(1)
-	std	r21,-88(1)
-	std	r20,-96(1)
-	li	r5,4096/32 - 1
-	addi	r3,r3,-8
-	li	r12,5
-0:	addi	r5,r5,-24
-	mtctr	r12
-	ld	r22,640(4)
-	ld	r21,512(4)
-	ld	r20,384(4)
-	ld	r11,256(4)
-	ld	r9,128(4)
-	ld	r7,0(4)
-	ld	r25,648(4)
-	ld	r24,520(4)
-	ld	r23,392(4)
-	ld	r10,264(4)
-	ld	r8,136(4)
-	ldu	r6,8(4)
-	cmpwi	r5,24
-1:	std	r22,648(3)
-	std	r21,520(3)
-	std	r20,392(3)
-	std	r11,264(3)
-	std	r9,136(3)
-	std	r7,8(3)
-	ld	r28,648(4)
-	ld	r27,520(4)
-	ld	r26,392(4)
-	ld	r31,264(4)
-	ld	r30,136(4)
-	ld	r29,8(4)
-	std	r25,656(3)
-	std	r24,528(3)
-	std	r23,400(3)
-	std	r10,272(3)
-	std	r8,144(3)
-	std	r6,16(3)
-	ld	r22,656(4)
-	ld	r21,528(4)
-	ld	r20,400(4)
-	ld	r11,272(4)
-	ld	r9,144(4)
-	ld	r7,16(4)
-	std	r28,664(3)
-	std	r27,536(3)
-	std	r26,408(3)
-	std	r31,280(3)
-	std	r30,152(3)
-	stdu	r29,24(3)
-	ld	r25,664(4)
-	ld	r24,536(4)
-	ld	r23,408(4)
-	ld	r10,280(4)
-	ld	r8,152(4)
-	ldu	r6,24(4)
-	bdnz	1b
-	std	r22,648(3)
-	std	r21,520(3)
-	std	r20,392(3)
-	std	r11,264(3)
-	std	r9,136(3)
-	std	r7,8(3)
-	addi	r4,r4,640
-	addi	r3,r3,648
-	bge	0b
-	mtctr	r5
-	ld	r7,0(4)
-	ld	r8,8(4)
-	ldu	r9,16(4)
-3:	ld	r10,8(4)
-	std	r7,8(3)
-	ld	r7,16(4)
-	std	r8,16(3)
-	ld	r8,24(4)
-	std	r9,24(3)
-	ldu	r9,32(4)
-	stdu	r10,32(3)
-	bdnz	3b
-4:	ld	r10,8(4)
-	std	r7,8(3)
-	std	r8,16(3)
-	std	r9,24(3)
-	std	r10,32(3)
-9:	ld	r20,-96(1)
-	ld	r21,-88(1)
-	ld	r22,-80(1)
-	ld	r23,-72(1)
-	ld	r24,-64(1)
-	ld	r25,-56(1)
-	ld	r26,-48(1)
-	ld	r27,-40(1)
-	ld	r28,-32(1)
-	ld	r29,-24(1)
-	ld	r30,-16(1)
-	ld	r31,-8(1)
-	blr
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
new file mode 100644
index 00000000000..733d61618bb
--- /dev/null
+++ b/arch/powerpc/lib/copypage_64.S
@@ -0,0 +1,121 @@
+/*
+ * arch/ppc64/lib/copypage.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_page)
+	std	r31,-8(1)
+	std	r30,-16(1)
+	std	r29,-24(1)
+	std	r28,-32(1)
+	std	r27,-40(1)
+	std	r26,-48(1)
+	std	r25,-56(1)
+	std	r24,-64(1)
+	std	r23,-72(1)
+	std	r22,-80(1)
+	std	r21,-88(1)
+	std	r20,-96(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r12,5
+0:	addi	r5,r5,-24
+	mtctr	r12
+	ld	r22,640(4)
+	ld	r21,512(4)
+	ld	r20,384(4)
+	ld	r11,256(4)
+	ld	r9,128(4)
+	ld	r7,0(4)
+	ld	r25,648(4)
+	ld	r24,520(4)
+	ld	r23,392(4)
+	ld	r10,264(4)
+	ld	r8,136(4)
+	ldu	r6,8(4)
+	cmpwi	r5,24
+1:	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	ld	r28,648(4)
+	ld	r27,520(4)
+	ld	r26,392(4)
+	ld	r31,264(4)
+	ld	r30,136(4)
+	ld	r29,8(4)
+	std	r25,656(3)
+	std	r24,528(3)
+	std	r23,400(3)
+	std	r10,272(3)
+	std	r8,144(3)
+	std	r6,16(3)
+	ld	r22,656(4)
+	ld	r21,528(4)
+	ld	r20,400(4)
+	ld	r11,272(4)
+	ld	r9,144(4)
+	ld	r7,16(4)
+	std	r28,664(3)
+	std	r27,536(3)
+	std	r26,408(3)
+	std	r31,280(3)
+	std	r30,152(3)
+	stdu	r29,24(3)
+	ld	r25,664(4)
+	ld	r24,536(4)
+	ld	r23,408(4)
+	ld	r10,280(4)
+	ld	r8,152(4)
+	ldu	r6,24(4)
+	bdnz	1b
+	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	addi	r4,r4,640
+	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+	ld	r7,0(4)
+	ld	r8,8(4)
+	ldu	r9,16(4)
+3:	ld	r10,8(4)
+	std	r7,8(3)
+	ld	r7,16(4)
+	std	r8,16(3)
+	ld	r8,24(4)
+	std	r9,24(3)
+	ldu	r9,32(4)
+	stdu	r10,32(3)
+	bdnz	3b
+4:	ld	r10,8(4)
+	std	r7,8(3)
+	std	r8,16(3)
+	std	r9,24(3)
+	std	r10,32(3)
+9:	ld	r20,-96(1)
+	ld	r21,-88(1)
+	ld	r22,-80(1)
+	ld	r23,-72(1)
+	ld	r24,-64(1)
+	ld	r25,-56(1)
+	ld	r26,-48(1)
+	ld	r27,-40(1)
+	ld	r28,-32(1)
+	ld	r29,-24(1)
+	ld	r30,-16(1)
+	ld	r31,-8(1)
+	blr
diff --git a/arch/powerpc/lib/copyuser.S b/arch/powerpc/lib/copyuser.S
deleted file mode 100644
index a0b3fbbd6fb..00000000000
--- a/arch/powerpc/lib/copyuser.S
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * arch/ppc64/lib/copyuser.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-	.align	7
-_GLOBAL(__copy_tofrom_user)
-	/* first check for a whole page copy on a page boundary */
-	cmpldi	cr1,r5,16
-	cmpdi	cr6,r5,4096
-	or	r0,r3,r4
-	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
-	andi.	r0,r0,4095
-	std	r3,-24(r1)
-	crand	cr0*4+2,cr0*4+2,cr6*4+2
-	std	r4,-16(r1)
-	std	r5,-8(r1)
-	dcbt	0,r4
-	beq	.Lcopy_page
-	andi.	r6,r6,7
-	mtcrf	0x01,r5
-	blt	cr1,.Lshort_copy
-	bne	.Ldst_unaligned
-.Ldst_aligned:
-	andi.	r0,r4,7
-	addi	r3,r3,-16
-	bne	.Lsrc_unaligned
-	srdi	r7,r5,4
-20:	ld	r9,0(r4)
-	addi	r4,r4,-8
-	mtctr	r7
-	andi.	r5,r5,7
-	bf	cr7*4+0,22f
-	addi	r3,r3,8
-	addi	r4,r4,8
-	mr	r8,r9
-	blt	cr1,72f
-21:	ld	r9,8(r4)
-70:	std	r8,8(r3)
-22:	ldu	r8,16(r4)
-71:	stdu	r9,16(r3)
-	bdnz	21b
-72:	std	r8,8(r3)
-	beq+	3f
-	addi	r3,r3,16
-23:	ld	r9,8(r4)
-.Ldo_tail:
-	bf	cr7*4+1,1f
-	rotldi	r9,r9,32
-73:	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-	rotldi	r9,r9,16
-74:	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-	rotldi	r9,r9,8
-75:	stb	r9,0(r3)
-3:	li	r3,0
-	blr
-
-.Lsrc_unaligned:
-	srdi	r6,r5,3
-	addi	r5,r5,-16
-	subf	r4,r0,r4
-	srdi	r7,r5,4
-	sldi	r10,r0,3
-	cmpldi	cr6,r6,3
-	andi.	r5,r5,7
-	mtctr	r7
-	subfic	r11,r10,64
-	add	r5,r5,r0
-	bt	cr7*4+0,28f
-
-24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
-25:	ld	r0,8(r4)
-	sld	r6,r9,r10
-26:	ldu	r9,16(r4)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	or	r7,r7,r6
-	blt	cr6,79f
-27:	ld	r0,8(r4)
-	b	2f
-
-28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
-29:	ldu	r9,8(r4)
-	sld	r8,r0,r10
-	addi	r3,r3,-8
-	blt	cr6,5f
-30:	ld	r0,8(r4)
-	srd	r12,r9,r11
-	sld	r6,r9,r10
-31:	ldu	r9,16(r4)
-	or	r12,r8,r12
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	addi	r3,r3,16
-	beq	cr6,78f
-
-1:	or	r7,r7,r6
-32:	ld	r0,8(r4)
-76:	std	r12,8(r3)
-2:	srd	r12,r9,r11
-	sld	r6,r9,r10
-33:	ldu	r9,16(r4)
-	or	r12,r8,r12
-77:	stdu	r7,16(r3)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	bdnz	1b
-
-78:	std	r12,8(r3)
-	or	r7,r7,r6
-79:	std	r7,16(r3)
-5:	srd	r12,r9,r11
-	or	r12,r8,r12
-80:	std	r12,24(r3)
-	bne	6f
-	li	r3,0
-	blr
-6:	cmpwi	cr1,r5,8
-	addi	r3,r3,32
-	sld	r9,r9,r10
-	ble	cr1,.Ldo_tail
-34:	ld	r0,8(r4)
-	srd	r7,r0,r11
-	or	r9,r7,r9
-	b	.Ldo_tail
-
-.Ldst_unaligned:
-	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
-	subf	r5,r6,r5
-	li	r7,0
-	cmpldi	r1,r5,16
-	bf	cr7*4+3,1f
-35:	lbz	r0,0(r4)
-81:	stb	r0,0(r3)
-	addi	r7,r7,1
-1:	bf	cr7*4+2,2f
-36:	lhzx	r0,r7,r4
-82:	sthx	r0,r7,r3
-	addi	r7,r7,2
-2:	bf	cr7*4+1,3f
-37:	lwzx	r0,r7,r4
-83:	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
-	add	r4,r6,r4
-	add	r3,r6,r3
-	b	.Ldst_aligned
-
-.Lshort_copy:
-	bf	cr7*4+0,1f
-38:	lwz	r0,0(r4)
-39:	lwz	r9,4(r4)
-	addi	r4,r4,8
-84:	stw	r0,0(r3)
-85:	stw	r9,4(r3)
-	addi	r3,r3,8
-1:	bf	cr7*4+1,2f
-40:	lwz	r0,0(r4)
-	addi	r4,r4,4
-86:	stw	r0,0(r3)
-	addi	r3,r3,4
-2:	bf	cr7*4+2,3f
-41:	lhz	r0,0(r4)
-	addi	r4,r4,2
-87:	sth	r0,0(r3)
-	addi	r3,r3,2
-3:	bf	cr7*4+3,4f
-42:	lbz	r0,0(r4)
-88:	stb	r0,0(r3)
-4:	li	r3,0
-	blr
-
-/*
- * exception handlers follow
- * we have to return the number of bytes not copied
- * for an exception on a load, we set the rest of the destination to 0
- */
-
-136:
-137:
-	add	r3,r3,r7
-	b	1f
-130:
-131:
-	addi	r3,r3,8
-120:
-122:
-124:
-125:
-126:
-127:
-128:
-129:
-133:
-	addi	r3,r3,8
-121:
-132:
-	addi	r3,r3,8
-123:
-134:
-135:
-138:
-139:
-140:
-141:
-142:
-
-/*
- * here we have had a fault on a load and r3 points to the first
- * unmodified byte of the destination
- */
-1:	ld	r6,-24(r1)
-	ld	r4,-16(r1)
-	ld	r5,-8(r1)
-	subf	r6,r6,r3
-	add	r4,r4,r6
-	subf	r5,r6,r5	/* #bytes left to go */
-
-/*
- * first see if we can copy any more bytes before hitting another exception
- */
-	mtctr	r5
-43:	lbz	r0,0(r4)
-	addi	r4,r4,1
-89:	stb	r0,0(r3)
-	addi	r3,r3,1
-	bdnz	43b
-	li	r3,0		/* huh? all copied successfully this time? */
-	blr
-
-/*
- * here we have trapped again, need to clear ctr bytes starting at r3
- */
-143:	mfctr	r5
-	li	r0,0
-	mr	r4,r3
-	mr	r3,r5		/* return the number of bytes not copied */
-1:	andi.	r9,r4,7
-	beq	3f
-90:	stb	r0,0(r4)
-	addic.	r5,r5,-1
-	addi	r4,r4,1
-	bne	1b
-	blr
-3:	cmpldi	cr1,r5,8
-	srdi	r9,r5,3
-	andi.	r5,r5,7
-	blt	cr1,93f
-	mtctr	r9
-91:	std	r0,0(r4)
-	addi	r4,r4,8
-	bdnz	91b
-93:	beqlr
-	mtctr	r5	
-92:	stb	r0,0(r4)
-	addi	r4,r4,1
-	bdnz	92b
-	blr
-
-/*
- * exception handlers for stores: we just need to work
- * out how many bytes weren't copied
- */
-182:
-183:
-	add	r3,r3,r7
-	b	1f
-180:
-	addi	r3,r3,8
-171:
-177:
-	addi	r3,r3,8
-170:
-172:
-176:
-178:
-	addi	r3,r3,4
-185:
-	addi	r3,r3,4
-173:
-174:
-175:
-179:
-181:
-184:
-186:
-187:
-188:
-189:	
-1:
-	ld	r6,-24(r1)
-	ld	r5,-8(r1)
-	add	r6,r6,r5
-	subf	r3,r3,r6	/* #bytes not copied */
-190:
-191:
-192:
-	blr			/* #bytes not copied in r3 */
-
-	.section __ex_table,"a"
-	.align	3
-	.llong	20b,120b
-	.llong	21b,121b
-	.llong	70b,170b
-	.llong	22b,122b
-	.llong	71b,171b
-	.llong	72b,172b
-	.llong	23b,123b
-	.llong	73b,173b
-	.llong	74b,174b
-	.llong	75b,175b
-	.llong	24b,124b
-	.llong	25b,125b
-	.llong	26b,126b
-	.llong	27b,127b
-	.llong	28b,128b
-	.llong	29b,129b
-	.llong	30b,130b
-	.llong	31b,131b
-	.llong	32b,132b
-	.llong	76b,176b
-	.llong	33b,133b
-	.llong	77b,177b
-	.llong	78b,178b
-	.llong	79b,179b
-	.llong	80b,180b
-	.llong	34b,134b
-	.llong	35b,135b
-	.llong	81b,181b
-	.llong	36b,136b
-	.llong	82b,182b
-	.llong	37b,137b
-	.llong	83b,183b
-	.llong	38b,138b
-	.llong	39b,139b
-	.llong	84b,184b
-	.llong	85b,185b
-	.llong	40b,140b
-	.llong	86b,186b
-	.llong	41b,141b
-	.llong	87b,187b
-	.llong	42b,142b
-	.llong	88b,188b
-	.llong	43b,143b
-	.llong	89b,189b
-	.llong	90b,190b
-	.llong	91b,191b
-	.llong	92b,192b
-	
-	.text
-
-/*
- * Routine to copy a whole page of data, optimized for POWER4.
- * On POWER4 it is more than 50% faster than the simple loop
- * above (following the .Ldst_aligned label) but it runs slightly
- * slower on POWER3.
- */
-.Lcopy_page:
-	std	r31,-32(1)
-	std	r30,-40(1)
-	std	r29,-48(1)
-	std	r28,-56(1)
-	std	r27,-64(1)
-	std	r26,-72(1)
-	std	r25,-80(1)
-	std	r24,-88(1)
-	std	r23,-96(1)
-	std	r22,-104(1)
-	std	r21,-112(1)
-	std	r20,-120(1)
-	li	r5,4096/32 - 1
-	addi	r3,r3,-8
-	li	r0,5
-0:	addi	r5,r5,-24
-	mtctr	r0
-20:	ld	r22,640(4)
-21:	ld	r21,512(4)
-22:	ld	r20,384(4)
-23:	ld	r11,256(4)
-24:	ld	r9,128(4)
-25:	ld	r7,0(4)
-26:	ld	r25,648(4)
-27:	ld	r24,520(4)
-28:	ld	r23,392(4)
-29:	ld	r10,264(4)
-30:	ld	r8,136(4)
-31:	ldu	r6,8(4)
-	cmpwi	r5,24
-1:
-32:	std	r22,648(3)
-33:	std	r21,520(3)
-34:	std	r20,392(3)
-35:	std	r11,264(3)
-36:	std	r9,136(3)
-37:	std	r7,8(3)
-38:	ld	r28,648(4)
-39:	ld	r27,520(4)
-40:	ld	r26,392(4)
-41:	ld	r31,264(4)
-42:	ld	r30,136(4)
-43:	ld	r29,8(4)
-44:	std	r25,656(3)
-45:	std	r24,528(3)
-46:	std	r23,400(3)
-47:	std	r10,272(3)
-48:	std	r8,144(3)
-49:	std	r6,16(3)
-50:	ld	r22,656(4)
-51:	ld	r21,528(4)
-52:	ld	r20,400(4)
-53:	ld	r11,272(4)
-54:	ld	r9,144(4)
-55:	ld	r7,16(4)
-56:	std	r28,664(3)
-57:	std	r27,536(3)
-58:	std	r26,408(3)
-59:	std	r31,280(3)
-60:	std	r30,152(3)
-61:	stdu	r29,24(3)
-62:	ld	r25,664(4)
-63:	ld	r24,536(4)
-64:	ld	r23,408(4)
-65:	ld	r10,280(4)
-66:	ld	r8,152(4)
-67:	ldu	r6,24(4)
-	bdnz	1b
-68:	std	r22,648(3)
-69:	std	r21,520(3)
-70:	std	r20,392(3)
-71:	std	r11,264(3)
-72:	std	r9,136(3)
-73:	std	r7,8(3)
-74:	addi	r4,r4,640
-75:	addi	r3,r3,648
-	bge	0b
-	mtctr	r5
-76:	ld	r7,0(4)
-77:	ld	r8,8(4)
-78:	ldu	r9,16(4)
-3:
-79:	ld	r10,8(4)
-80:	std	r7,8(3)
-81:	ld	r7,16(4)
-82:	std	r8,16(3)
-83:	ld	r8,24(4)
-84:	std	r9,24(3)
-85:	ldu	r9,32(4)
-86:	stdu	r10,32(3)
-	bdnz	3b
-4:
-87:	ld	r10,8(4)
-88:	std	r7,8(3)
-89:	std	r8,16(3)
-90:	std	r9,24(3)
-91:	std	r10,32(3)
-9:	ld	r20,-120(1)
-	ld	r21,-112(1)
-	ld	r22,-104(1)
-	ld	r23,-96(1)
-	ld	r24,-88(1)
-	ld	r25,-80(1)
-	ld	r26,-72(1)
-	ld	r27,-64(1)
-	ld	r28,-56(1)
-	ld	r29,-48(1)
-	ld	r30,-40(1)
-	ld	r31,-32(1)
-	li	r3,0
-	blr
-
-/*
- * on an exception, reset to the beginning and jump back into the
- * standard __copy_tofrom_user
- */
-100:	ld	r20,-120(1)
-	ld	r21,-112(1)
-	ld	r22,-104(1)
-	ld	r23,-96(1)
-	ld	r24,-88(1)
-	ld	r25,-80(1)
-	ld	r26,-72(1)
-	ld	r27,-64(1)
-	ld	r28,-56(1)
-	ld	r29,-48(1)
-	ld	r30,-40(1)
-	ld	r31,-32(1)
-	ld	r3,-24(r1)
-	ld	r4,-16(r1)
-	li	r5,4096
-	b	.Ldst_aligned
-
-	.section __ex_table,"a"
-	.align	3
-	.llong	20b,100b
-	.llong	21b,100b
-	.llong	22b,100b
-	.llong	23b,100b
-	.llong	24b,100b
-	.llong	25b,100b
-	.llong	26b,100b
-	.llong	27b,100b
-	.llong	28b,100b
-	.llong	29b,100b
-	.llong	30b,100b
-	.llong	31b,100b
-	.llong	32b,100b
-	.llong	33b,100b
-	.llong	34b,100b
-	.llong	35b,100b
-	.llong	36b,100b
-	.llong	37b,100b
-	.llong	38b,100b
-	.llong	39b,100b
-	.llong	40b,100b
-	.llong	41b,100b
-	.llong	42b,100b
-	.llong	43b,100b
-	.llong	44b,100b
-	.llong	45b,100b
-	.llong	46b,100b
-	.llong	47b,100b
-	.llong	48b,100b
-	.llong	49b,100b
-	.llong	50b,100b
-	.llong	51b,100b
-	.llong	52b,100b
-	.llong	53b,100b
-	.llong	54b,100b
-	.llong	55b,100b
-	.llong	56b,100b
-	.llong	57b,100b
-	.llong	58b,100b
-	.llong	59b,100b
-	.llong	60b,100b
-	.llong	61b,100b
-	.llong	62b,100b
-	.llong	63b,100b
-	.llong	64b,100b
-	.llong	65b,100b
-	.llong	66b,100b
-	.llong	67b,100b
-	.llong	68b,100b
-	.llong	69b,100b
-	.llong	70b,100b
-	.llong	71b,100b
-	.llong	72b,100b
-	.llong	73b,100b
-	.llong	74b,100b
-	.llong	75b,100b
-	.llong	76b,100b
-	.llong	77b,100b
-	.llong	78b,100b
-	.llong	79b,100b
-	.llong	80b,100b
-	.llong	81b,100b
-	.llong	82b,100b
-	.llong	83b,100b
-	.llong	84b,100b
-	.llong	85b,100b
-	.llong	86b,100b
-	.llong	87b,100b
-	.llong	88b,100b
-	.llong	89b,100b
-	.llong	90b,100b
-	.llong	91b,100b
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
new file mode 100644
index 00000000000..a0b3fbbd6fb
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -0,0 +1,576 @@
+/*
+ * arch/ppc64/lib/copyuser.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(__copy_tofrom_user)
+	/* first check for a whole page copy on a page boundary */
+	cmpldi	cr1,r5,16
+	cmpdi	cr6,r5,4096
+	or	r0,r3,r4
+	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
+	andi.	r0,r0,4095
+	std	r3,-24(r1)
+	crand	cr0*4+2,cr0*4+2,cr6*4+2
+	std	r4,-16(r1)
+	std	r5,-8(r1)
+	dcbt	0,r4
+	beq	.Lcopy_page
+	andi.	r6,r6,7
+	mtcrf	0x01,r5
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+20:	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,22f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,72f
+21:	ld	r9,8(r4)
+70:	std	r8,8(r3)
+22:	ldu	r8,16(r4)
+71:	stdu	r9,16(r3)
+	bdnz	21b
+72:	std	r8,8(r3)
+	beq+	3f
+	addi	r3,r3,16
+23:	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+73:	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+74:	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+75:	stb	r9,0(r3)
+3:	li	r3,0
+	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpldi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+	bt	cr7*4+0,28f
+
+24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
+25:	ld	r0,8(r4)
+	sld	r6,r9,r10
+26:	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,79f
+27:	ld	r0,8(r4)
+	b	2f
+
+28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
+29:	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+30:	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+31:	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,78f
+
+1:	or	r7,r7,r6
+32:	ld	r0,8(r4)
+76:	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+33:	ldu	r9,16(r4)
+	or	r12,r8,r12
+77:	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+78:	std	r12,8(r3)
+	or	r7,r7,r6
+79:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+80:	std	r12,24(r3)
+	bne	6f
+	li	r3,0
+	blr
+6:	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+34:	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+35:	lbz	r0,0(r4)
+81:	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+36:	lhzx	r0,r7,r4
+82:	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+37:	lwzx	r0,r7,r4
+83:	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+38:	lwz	r0,0(r4)
+39:	lwz	r9,4(r4)
+	addi	r4,r4,8
+84:	stw	r0,0(r3)
+85:	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+40:	lwz	r0,0(r4)
+	addi	r4,r4,4
+86:	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+41:	lhz	r0,0(r4)
+	addi	r4,r4,2
+87:	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+42:	lbz	r0,0(r4)
+88:	stb	r0,0(r3)
+4:	li	r3,0
+	blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ */
+
+136:
+137:
+	add	r3,r3,r7
+	b	1f
+130:
+131:
+	addi	r3,r3,8
+120:
+122:
+124:
+125:
+126:
+127:
+128:
+129:
+133:
+	addi	r3,r3,8
+121:
+132:
+	addi	r3,r3,8
+123:
+134:
+135:
+138:
+139:
+140:
+141:
+142:
+
+/*
+ * here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination
+ */
+1:	ld	r6,-24(r1)
+	ld	r4,-16(r1)
+	ld	r5,-8(r1)
+	subf	r6,r6,r3
+	add	r4,r4,r6
+	subf	r5,r6,r5	/* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+	mtctr	r5
+43:	lbz	r0,0(r4)
+	addi	r4,r4,1
+89:	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	43b
+	li	r3,0		/* huh? all copied successfully this time? */
+	blr
+
+/*
+ * here we have trapped again, need to clear ctr bytes starting at r3
+ */
+143:	mfctr	r5
+	li	r0,0
+	mr	r4,r3
+	mr	r3,r5		/* return the number of bytes not copied */
+1:	andi.	r9,r4,7
+	beq	3f
+90:	stb	r0,0(r4)
+	addic.	r5,r5,-1
+	addi	r4,r4,1
+	bne	1b
+	blr
+3:	cmpldi	cr1,r5,8
+	srdi	r9,r5,3
+	andi.	r5,r5,7
+	blt	cr1,93f
+	mtctr	r9
+91:	std	r0,0(r4)
+	addi	r4,r4,8
+	bdnz	91b
+93:	beqlr
+	mtctr	r5	
+92:	stb	r0,0(r4)
+	addi	r4,r4,1
+	bdnz	92b
+	blr
+
+/*
+ * exception handlers for stores: we just need to work
+ * out how many bytes weren't copied
+ */
+182:
+183:
+	add	r3,r3,r7
+	b	1f
+180:
+	addi	r3,r3,8
+171:
+177:
+	addi	r3,r3,8
+170:
+172:
+176:
+178:
+	addi	r3,r3,4
+185:
+	addi	r3,r3,4
+173:
+174:
+175:
+179:
+181:
+184:
+186:
+187:
+188:
+189:	
+1:
+	ld	r6,-24(r1)
+	ld	r5,-8(r1)
+	add	r6,r6,r5
+	subf	r3,r3,r6	/* #bytes not copied */
+190:
+191:
+192:
+	blr			/* #bytes not copied in r3 */
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,120b
+	.llong	21b,121b
+	.llong	70b,170b
+	.llong	22b,122b
+	.llong	71b,171b
+	.llong	72b,172b
+	.llong	23b,123b
+	.llong	73b,173b
+	.llong	74b,174b
+	.llong	75b,175b
+	.llong	24b,124b
+	.llong	25b,125b
+	.llong	26b,126b
+	.llong	27b,127b
+	.llong	28b,128b
+	.llong	29b,129b
+	.llong	30b,130b
+	.llong	31b,131b
+	.llong	32b,132b
+	.llong	76b,176b
+	.llong	33b,133b
+	.llong	77b,177b
+	.llong	78b,178b
+	.llong	79b,179b
+	.llong	80b,180b
+	.llong	34b,134b
+	.llong	35b,135b
+	.llong	81b,181b
+	.llong	36b,136b
+	.llong	82b,182b
+	.llong	37b,137b
+	.llong	83b,183b
+	.llong	38b,138b
+	.llong	39b,139b
+	.llong	84b,184b
+	.llong	85b,185b
+	.llong	40b,140b
+	.llong	86b,186b
+	.llong	41b,141b
+	.llong	87b,187b
+	.llong	42b,142b
+	.llong	88b,188b
+	.llong	43b,143b
+	.llong	89b,189b
+	.llong	90b,190b
+	.llong	91b,191b
+	.llong	92b,192b
+	
+	.text
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label) but it runs slightly
+ * slower on POWER3.
+ */
+.Lcopy_page:
+	std	r31,-32(1)
+	std	r30,-40(1)
+	std	r29,-48(1)
+	std	r28,-56(1)
+	std	r27,-64(1)
+	std	r26,-72(1)
+	std	r25,-80(1)
+	std	r24,-88(1)
+	std	r23,-96(1)
+	std	r22,-104(1)
+	std	r21,-112(1)
+	std	r20,-120(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r0,5
+0:	addi	r5,r5,-24
+	mtctr	r0
+20:	ld	r22,640(4)
+21:	ld	r21,512(4)
+22:	ld	r20,384(4)
+23:	ld	r11,256(4)
+24:	ld	r9,128(4)
+25:	ld	r7,0(4)
+26:	ld	r25,648(4)
+27:	ld	r24,520(4)
+28:	ld	r23,392(4)
+29:	ld	r10,264(4)
+30:	ld	r8,136(4)
+31:	ldu	r6,8(4)
+	cmpwi	r5,24
+1:
+32:	std	r22,648(3)
+33:	std	r21,520(3)
+34:	std	r20,392(3)
+35:	std	r11,264(3)
+36:	std	r9,136(3)
+37:	std	r7,8(3)
+38:	ld	r28,648(4)
+39:	ld	r27,520(4)
+40:	ld	r26,392(4)
+41:	ld	r31,264(4)
+42:	ld	r30,136(4)
+43:	ld	r29,8(4)
+44:	std	r25,656(3)
+45:	std	r24,528(3)
+46:	std	r23,400(3)
+47:	std	r10,272(3)
+48:	std	r8,144(3)
+49:	std	r6,16(3)
+50:	ld	r22,656(4)
+51:	ld	r21,528(4)
+52:	ld	r20,400(4)
+53:	ld	r11,272(4)
+54:	ld	r9,144(4)
+55:	ld	r7,16(4)
+56:	std	r28,664(3)
+57:	std	r27,536(3)
+58:	std	r26,408(3)
+59:	std	r31,280(3)
+60:	std	r30,152(3)
+61:	stdu	r29,24(3)
+62:	ld	r25,664(4)
+63:	ld	r24,536(4)
+64:	ld	r23,408(4)
+65:	ld	r10,280(4)
+66:	ld	r8,152(4)
+67:	ldu	r6,24(4)
+	bdnz	1b
+68:	std	r22,648(3)
+69:	std	r21,520(3)
+70:	std	r20,392(3)
+71:	std	r11,264(3)
+72:	std	r9,136(3)
+73:	std	r7,8(3)
+74:	addi	r4,r4,640
+75:	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+76:	ld	r7,0(4)
+77:	ld	r8,8(4)
+78:	ldu	r9,16(4)
+3:
+79:	ld	r10,8(4)
+80:	std	r7,8(3)
+81:	ld	r7,16(4)
+82:	std	r8,16(3)
+83:	ld	r8,24(4)
+84:	std	r9,24(3)
+85:	ldu	r9,32(4)
+86:	stdu	r10,32(3)
+	bdnz	3b
+4:
+87:	ld	r10,8(4)
+88:	std	r7,8(3)
+89:	std	r8,16(3)
+90:	std	r9,24(3)
+91:	std	r10,32(3)
+9:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	li	r3,0
+	blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+100:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	ld	r3,-24(r1)
+	ld	r4,-16(r1)
+	li	r5,4096
+	b	.Ldst_aligned
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,100b
+	.llong	21b,100b
+	.llong	22b,100b
+	.llong	23b,100b
+	.llong	24b,100b
+	.llong	25b,100b
+	.llong	26b,100b
+	.llong	27b,100b
+	.llong	28b,100b
+	.llong	29b,100b
+	.llong	30b,100b
+	.llong	31b,100b
+	.llong	32b,100b
+	.llong	33b,100b
+	.llong	34b,100b
+	.llong	35b,100b
+	.llong	36b,100b
+	.llong	37b,100b
+	.llong	38b,100b
+	.llong	39b,100b
+	.llong	40b,100b
+	.llong	41b,100b
+	.llong	42b,100b
+	.llong	43b,100b
+	.llong	44b,100b
+	.llong	45b,100b
+	.llong	46b,100b
+	.llong	47b,100b
+	.llong	48b,100b
+	.llong	49b,100b
+	.llong	50b,100b
+	.llong	51b,100b
+	.llong	52b,100b
+	.llong	53b,100b
+	.llong	54b,100b
+	.llong	55b,100b
+	.llong	56b,100b
+	.llong	57b,100b
+	.llong	58b,100b
+	.llong	59b,100b
+	.llong	60b,100b
+	.llong	61b,100b
+	.llong	62b,100b
+	.llong	63b,100b
+	.llong	64b,100b
+	.llong	65b,100b
+	.llong	66b,100b
+	.llong	67b,100b
+	.llong	68b,100b
+	.llong	69b,100b
+	.llong	70b,100b
+	.llong	71b,100b
+	.llong	72b,100b
+	.llong	73b,100b
+	.llong	74b,100b
+	.llong	75b,100b
+	.llong	76b,100b
+	.llong	77b,100b
+	.llong	78b,100b
+	.llong	79b,100b
+	.llong	80b,100b
+	.llong	81b,100b
+	.llong	82b,100b
+	.llong	83b,100b
+	.llong	84b,100b
+	.llong	85b,100b
+	.llong	86b,100b
+	.llong	87b,100b
+	.llong	88b,100b
+	.llong	89b,100b
+	.llong	90b,100b
+	.llong	91b,100b
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
new file mode 100644
index 00000000000..68df20283ff
--- /dev/null
+++ b/arch/powerpc/lib/mem_64.S
@@ -0,0 +1,119 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(memset)
+	neg	r0,r3
+	rlwimi	r4,r4,8,16,23
+	andi.	r0,r0,7			/* # bytes to be 8-byte aligned */
+	rlwimi	r4,r4,16,0,15
+	cmplw	cr1,r5,r0		/* do we get that far? */
+	rldimi	r4,r4,32,0
+	mtcrf	1,r0
+	mr	r6,r3
+	blt	cr1,8f
+	beq+	3f			/* if already 8-byte aligned */
+	subf	r5,r0,r5
+	bf	31,1f
+	stb	r4,0(r6)
+	addi	r6,r6,1
+1:	bf	30,2f
+	sth	r4,0(r6)
+	addi	r6,r6,2
+2:	bf	29,3f
+	stw	r4,0(r6)
+	addi	r6,r6,4
+3:	srdi.	r0,r5,6
+	clrldi	r5,r5,58
+	mtctr	r0
+	beq	5f
+4:	std	r4,0(r6)
+	std	r4,8(r6)
+	std	r4,16(r6)
+	std	r4,24(r6)
+	std	r4,32(r6)
+	std	r4,40(r6)
+	std	r4,48(r6)
+	std	r4,56(r6)
+	addi	r6,r6,64
+	bdnz	4b
+5:	srwi.	r0,r5,3
+	clrlwi	r5,r5,29
+	mtcrf	1,r0
+	beq	8f
+	bf	29,6f
+	std	r4,0(r6)
+	std	r4,8(r6)
+	std	r4,16(r6)
+	std	r4,24(r6)
+	addi	r6,r6,32
+6:	bf	30,7f
+	std	r4,0(r6)
+	std	r4,8(r6)
+	addi	r6,r6,16
+7:	bf	31,8f
+	std	r4,0(r6)
+	addi	r6,r6,8
+8:	cmpwi	r5,0
+	mtcrf	1,r5
+	beqlr+
+	bf	29,9f
+	stw	r4,0(r6)
+	addi	r6,r6,4
+9:	bf	30,10f
+	sth	r4,0(r6)
+	addi	r6,r6,2
+10:	bflr	31
+	stb	r4,0(r6)
+	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	.backwards_memcpy
+	b	.memcpy
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
diff --git a/arch/powerpc/lib/memcpy.S b/arch/powerpc/lib/memcpy.S
deleted file mode 100644
index 9ccacdf5bcb..00000000000
--- a/arch/powerpc/lib/memcpy.S
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * arch/ppc64/lib/memcpy.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-	.align	7
-_GLOBAL(memcpy)
-	mtcrf	0x01,r5
-	cmpldi	cr1,r5,16
-	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
-	andi.	r6,r6,7
-	dcbt	0,r4
-	blt	cr1,.Lshort_copy
-	bne	.Ldst_unaligned
-.Ldst_aligned:
-	andi.	r0,r4,7
-	addi	r3,r3,-16
-	bne	.Lsrc_unaligned
-	srdi	r7,r5,4
-	ld	r9,0(r4)
-	addi	r4,r4,-8
-	mtctr	r7
-	andi.	r5,r5,7
-	bf	cr7*4+0,2f
-	addi	r3,r3,8
-	addi	r4,r4,8
-	mr	r8,r9
-	blt	cr1,3f
-1:	ld	r9,8(r4)
-	std	r8,8(r3)
-2:	ldu	r8,16(r4)
-	stdu	r9,16(r3)
-	bdnz	1b
-3:	std	r8,8(r3)
-	beqlr
-	addi	r3,r3,16
-	ld	r9,8(r4)
-.Ldo_tail:
-	bf	cr7*4+1,1f
-	rotldi	r9,r9,32
-	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-	rotldi	r9,r9,16
-	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-	rotldi	r9,r9,8
-	stb	r9,0(r3)
-3:	blr
-
-.Lsrc_unaligned:
-	srdi	r6,r5,3
-	addi	r5,r5,-16
-	subf	r4,r0,r4
-	srdi	r7,r5,4
-	sldi	r10,r0,3
-	cmpdi	cr6,r6,3
-	andi.	r5,r5,7
-	mtctr	r7
-	subfic	r11,r10,64
-	add	r5,r5,r0
-
-	bt	cr7*4+0,0f
-
-	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
-	ld	r0,8(r4)
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	or	r7,r7,r6
-	blt	cr6,4f
-	ld	r0,8(r4)
-	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
-	b	2f
-
-0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
-	ldu	r9,8(r4)
-	sld	r8,r0,r10
-	addi	r3,r3,-8
-	blt	cr6,5f
-	ld	r0,8(r4)
-	srd	r12,r9,r11
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	or	r12,r8,r12
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	addi	r3,r3,16
-	beq	cr6,3f
-
-	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
-1:	or	r7,r7,r6
-	ld	r0,8(r4)
-	std	r12,8(r3)
-2:	srd	r12,r9,r11
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	or	r12,r8,r12
-	stdu	r7,16(r3)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	bdnz	1b
-
-3:	std	r12,8(r3)
-	or	r7,r7,r6
-4:	std	r7,16(r3)
-5:	srd	r12,r9,r11
-	or	r12,r8,r12
-	std	r12,24(r3)
-	beqlr
-	cmpwi	cr1,r5,8
-	addi	r3,r3,32
-	sld	r9,r9,r10
-	ble	cr1,.Ldo_tail
-	ld	r0,8(r4)
-	srd	r7,r0,r11
-	or	r9,r7,r9
-	b	.Ldo_tail
-
-.Ldst_unaligned:
-	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
-	subf	r5,r6,r5
-	li	r7,0
-	cmpldi	r1,r5,16
-	bf	cr7*4+3,1f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-	addi	r7,r7,1
-1:	bf	cr7*4+2,2f
-	lhzx	r0,r7,r4
-	sthx	r0,r7,r3
-	addi	r7,r7,2
-2:	bf	cr7*4+1,3f
-	lwzx	r0,r7,r4
-	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
-	add	r4,r6,r4
-	add	r3,r6,r3
-	b	.Ldst_aligned
-
-.Lshort_copy:
-	bf	cr7*4+0,1f
-	lwz	r0,0(r4)
-	lwz	r9,4(r4)
-	addi	r4,r4,8
-	stw	r0,0(r3)
-	stw	r9,4(r3)
-	addi	r3,r3,8
-1:	bf	cr7*4+1,2f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-2:	bf	cr7*4+2,3f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-3:	bf	cr7*4+3,4f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-4:	blr
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
new file mode 100644
index 00000000000..9ccacdf5bcb
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -0,0 +1,172 @@
+/*
+ * arch/ppc64/lib/memcpy.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(memcpy)
+	mtcrf	0x01,r5
+	cmpldi	cr1,r5,16
+	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
+	andi.	r6,r6,7
+	dcbt	0,r4
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,2f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,3f
+1:	ld	r9,8(r4)
+	std	r8,8(r3)
+2:	ldu	r8,16(r4)
+	stdu	r9,16(r3)
+	bdnz	1b
+3:	std	r8,8(r3)
+	beqlr
+	addi	r3,r3,16
+	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+	stb	r9,0(r3)
+3:	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpdi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+
+	bt	cr7*4+0,0f
+
+	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
+	ld	r0,8(r4)
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,4f
+	ld	r0,8(r4)
+	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
+	b	2f
+
+0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
+	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,3f
+
+	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
+1:	or	r7,r7,r6
+	ld	r0,8(r4)
+	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+3:	std	r12,8(r3)
+	or	r7,r7,r6
+4:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+	std	r12,24(r3)
+	beqlr
+	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r4
+	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+	lwzx	r0,r7,r4
+	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+	lwz	r0,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+4:	blr
diff --git a/arch/powerpc/lib/usercopy.c b/arch/powerpc/lib/usercopy.c
deleted file mode 100644
index 5eea6f3c1e0..00000000000
--- a/arch/powerpc/lib/usercopy.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Functions which are too large to be inlined.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <asm/uaccess.h>
-
-unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	if (likely(access_ok(VERIFY_READ, from, n)))
-		n = __copy_from_user(to, from, n);
-	else
-		memset(to, 0, n);
-	return n;
-}
-
-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	if (likely(access_ok(VERIFY_WRITE, to, n)))
-		n = __copy_to_user(to, from, n);
-	return n;
-}
-
-unsigned long copy_in_user(void __user *to, const void __user *from,
-			   unsigned long n)
-{
-	might_sleep();
-	if (likely(access_ok(VERIFY_READ, from, n) &&
-	    access_ok(VERIFY_WRITE, to, n)))
-		n =__copy_tofrom_user(to, from, n);
-	return n;
-}
-
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_in_user);
-
diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c
new file mode 100644
index 00000000000..5eea6f3c1e0
--- /dev/null
+++ b/arch/powerpc/lib/usercopy_64.c
@@ -0,0 +1,41 @@
+/*
+ * Functions which are too large to be inlined.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+unsigned long copy_in_user(void __user *to, const void __user *from,
+			   unsigned long n)
+{
+	might_sleep();
+	if (likely(access_ok(VERIFY_READ, from, n) &&
+	    access_ok(VERIFY_WRITE, to, n)))
+		n =__copy_tofrom_user(to, from, n);
+	return n;
+}
+
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_in_user);
+
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index afd3be112b7..35497deeb4b 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -3,10 +3,10 @@
 #
 
 obj-y				:= fault.o mem.o lmb.o
-obj-$(CONFIG_PPC32)		+= init.o pgtable.o mmu_context.o \
-				   tlb.o
-obj-$(CONFIG_PPC64)		+= init64.o pgtable64.o mmu_context64.o
-obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu.o hash_32.o
+obj-$(CONFIG_PPC32)		+= init_32.o pgtable_32.o mmu_context_32.o \
+				   tlb_32.o
+obj-$(CONFIG_PPC64)		+= init_64.o pgtable_64.o mmu_context_64.o
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
 obj-$(CONFIG_40x)		+= 4xx_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S
deleted file mode 100644
index 57278a8dd13..00000000000
--- a/arch/powerpc/mm/hash_32.S
+++ /dev/null
@@ -1,618 +0,0 @@
-/*
- *  arch/ppc/kernel/hashtable.S
- *
- *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
- *
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *
- *  This file contains low-level assembler routines for managing
- *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
- *  hash table, so this file is not used on them.)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-
-#ifdef CONFIG_SMP
-	.comm	mmu_hash_lock,4
-#endif /* CONFIG_SMP */
-
-/*
- * Sync CPUs with hash_page taking & releasing the hash
- * table lock
- */
-#ifdef CONFIG_SMP
-	.text
-_GLOBAL(hash_page_sync)
-	lis	r8,mmu_hash_lock@h
-	ori	r8,r8,mmu_hash_lock@l
-	lis	r0,0x0fff
-	b	10f
-11:	lwz	r6,0(r8)
-	cmpwi	0,r6,0
-	bne	11b
-10:	lwarx	r6,0,r8
-	cmpwi	0,r6,0
-	bne-	11b
-	stwcx.	r0,0,r8
-	bne-	10b
-	isync
-	eieio
-	li	r0,0
-	stw	r0,0(r8)
-	blr	
-#endif
-
-/*
- * Load a PTE into the hash table, if possible.
- * The address is in r4, and r3 contains an access flag:
- * _PAGE_RW (0x400) if a write.
- * r9 contains the SRR1 value, from which we use the MSR_PR bit.
- * SPRG3 contains the physical address of the current task's thread.
- *
- * Returns to the caller if the access is illegal or there is no
- * mapping for the address.  Otherwise it places an appropriate PTE
- * in the hash table and returns from the exception.
- * Uses r0, r3 - r8, ctr, lr.
- */
-	.text
-_GLOBAL(hash_page)
-#ifdef CONFIG_PPC64BRIDGE
-	mfmsr	r0
-	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
-	MTMSRD(r0)
-	isync
-#endif
-	tophys(r7,0)			/* gets -KERNELBASE into r7 */
-#ifdef CONFIG_SMP
-	addis	r8,r7,mmu_hash_lock@h
-	ori	r8,r8,mmu_hash_lock@l
-	lis	r0,0x0fff
-	b	10f
-11:	lwz	r6,0(r8)
-	cmpwi	0,r6,0
-	bne	11b
-10:	lwarx	r6,0,r8
-	cmpwi	0,r6,0
-	bne-	11b
-	stwcx.	r0,0,r8
-	bne-	10b
-	isync
-#endif
-	/* Get PTE (linux-style) and check access */
-	lis	r0,KERNELBASE@h		/* check if kernel address */
-	cmplw	0,r4,r0
-	mfspr	r8,SPRN_SPRG3		/* current task's THREAD (phys) */
-	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
-	lwz	r5,PGDIR(r8)		/* virt page-table root */
-	blt+	112f			/* assume user more likely */
-	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
-	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
-	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
-112:	add	r5,r5,r7		/* convert to phys addr */
-	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
-	lwz	r8,0(r5)		/* get pmd entry */
-	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
-#ifdef CONFIG_SMP
-	beq-	hash_page_out		/* return if no mapping */
-#else
-	/* XXX it seems like the 601 will give a machine fault on the
-	   rfi if its alignment is wrong (bottom 4 bits of address are
-	   8 or 0xc) and we have had a not-taken conditional branch
-	   to the address following the rfi. */
-	beqlr-
-#endif
-	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
-	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
-	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
-
-	/*
-	 * Update the linux PTE atomically.  We do the lwarx up-front
-	 * because almost always, there won't be a permission violation
-	 * and there won't already be an HPTE, and thus we will have
-	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
-	 */
-retry:
-	lwarx	r6,0,r8			/* get linux-style pte */
-	andc.	r5,r3,r6		/* check access & ~permission */
-#ifdef CONFIG_SMP
-	bne-	hash_page_out		/* return if access not permitted */
-#else
-	bnelr-
-#endif
-	or	r5,r0,r6		/* set accessed/dirty bits */
-	stwcx.	r5,0,r8			/* attempt to update PTE */
-	bne-	retry			/* retry if someone got there first */
-
-	mfsrin	r3,r4			/* get segment reg for segment */
-	mfctr	r0
-	stw	r0,_CTR(r11)
-	bl	create_hpte		/* add the hash table entry */
-
-#ifdef CONFIG_SMP
-	eieio
-	addis	r8,r7,mmu_hash_lock@ha
-	li	r0,0
-	stw	r0,mmu_hash_lock@l(r8)
-#endif
-
-	/* Return from the exception */
-	lwz	r5,_CTR(r11)
-	mtctr	r5
-	lwz	r0,GPR0(r11)
-	lwz	r7,GPR7(r11)
-	lwz	r8,GPR8(r11)
-	b	fast_exception_return
-
-#ifdef CONFIG_SMP
-hash_page_out:
-	eieio
-	addis	r8,r7,mmu_hash_lock@ha
-	li	r0,0
-	stw	r0,mmu_hash_lock@l(r8)
-	blr
-#endif /* CONFIG_SMP */
-
-/*
- * Add an entry for a particular page to the hash table.
- *
- * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
- *
- * We assume any necessary modifications to the pte (e.g. setting
- * the accessed bit) have already been done and that there is actually
- * a hash table in use (i.e. we're not on a 603).
- */
-_GLOBAL(add_hash_page)
-	mflr	r0
-	stw	r0,4(r1)
-
-	/* Convert context and va to VSID */
-	mulli	r3,r3,897*16		/* multiply context by context skew */
-	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
-	mulli	r0,r0,0x111		/* multiply by ESID skew */
-	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
-
-#ifdef CONFIG_SMP
-	rlwinm	r8,r1,0,0,18		/* use cpu number to make tag */
-	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
-	oris	r8,r8,12
-#endif /* CONFIG_SMP */
-
-	/*
-	 * We disable interrupts here, even on UP, because we don't
-	 * want to race with hash_page, and because we want the
-	 * _PAGE_HASHPTE bit to be a reliable indication of whether
-	 * the HPTE exists (or at least whether one did once).
-	 * We also turn off the MMU for data accesses so that we
-	 * we can't take a hash table miss (assuming the code is
-	 * covered by a BAT).  -- paulus
-	 */
-	mfmsr	r10
-	SYNC
-	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
-	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
-	mtmsr	r0
-	SYNC_601
-	isync
-
-	tophys(r7,0)
-
-#ifdef CONFIG_SMP
-	addis	r9,r7,mmu_hash_lock@ha
-	addi	r9,r9,mmu_hash_lock@l
-10:	lwarx	r0,0,r9			/* take the mmu_hash_lock */
-	cmpi	0,r0,0
-	bne-	11f
-	stwcx.	r8,0,r9
-	beq+	12f
-11:	lwz	r0,0(r9)
-	cmpi	0,r0,0
-	beq	10b
-	b	11b
-12:	isync
-#endif
-
-	/*
-	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
-	 * If _PAGE_HASHPTE was already set, we don't replace the existing
-	 * HPTE, so we just unlock and return.
-	 */
-	mr	r8,r5
-	rlwimi	r8,r4,22,20,29
-1:	lwarx	r6,0,r8
-	andi.	r0,r6,_PAGE_HASHPTE
-	bne	9f			/* if HASHPTE already set, done */
-	ori	r5,r6,_PAGE_HASHPTE
-	stwcx.	r5,0,r8
-	bne-	1b
-
-	bl	create_hpte
-
-9:
-#ifdef CONFIG_SMP
-	eieio
-	li	r0,0
-	stw	r0,0(r9)		/* clear mmu_hash_lock */
-#endif
-
-	/* reenable interrupts and DR */
-	mtmsr	r10
-	SYNC_601
-	isync
-
-	lwz	r0,4(r1)
-	mtlr	r0
-	blr
-
-/*
- * This routine adds a hardware PTE to the hash table.
- * It is designed to be called with the MMU either on or off.
- * r3 contains the VSID, r4 contains the virtual address,
- * r5 contains the linux PTE, r6 contains the old value of the
- * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
- * offset to be added to addresses (0 if the MMU is on,
- * -KERNELBASE if it is off).
- * On SMP, the caller should have the mmu_hash_lock held.
- * We assume that the caller has (or will) set the _PAGE_HASHPTE
- * bit in the linux PTE in memory.  The value passed in r6 should
- * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
- * this routine will skip the search for an existing HPTE.
- * This procedure modifies r0, r3 - r6, r8, cr0.
- *  -- paulus.
- *
- * For speed, 4 of the instructions get patched once the size and
- * physical address of the hash table are known.  These definitions
- * of Hash_base and Hash_bits below are just an example.
- */
-Hash_base = 0xc0180000
-Hash_bits = 12				/* e.g. 256kB hash table */
-Hash_msk = (((1 << Hash_bits) - 1) * 64)
-
-#ifndef CONFIG_PPC64BRIDGE
-/* defines for the PTE format for 32-bit PPCs */
-#define PTE_SIZE	8
-#define PTEG_SIZE	64
-#define LG_PTEG_SIZE	6
-#define LDPTEu		lwzu
-#define STPTE		stw
-#define CMPPTE		cmpw
-#define PTE_H		0x40
-#define PTE_V		0x80000000
-#define TST_V(r)	rlwinm. r,r,0,0,0
-#define SET_V(r)	oris r,r,PTE_V@h
-#define CLR_V(r,t)	rlwinm r,r,0,1,31
-
-#else
-/* defines for the PTE format for 64-bit PPCs */
-#define PTE_SIZE	16
-#define PTEG_SIZE	128
-#define LG_PTEG_SIZE	7
-#define LDPTEu		ldu
-#define STPTE		std
-#define CMPPTE		cmpd
-#define PTE_H		2
-#define PTE_V		1
-#define TST_V(r)	andi. r,r,PTE_V
-#define SET_V(r)	ori r,r,PTE_V
-#define CLR_V(r,t)	li t,PTE_V; andc r,r,t
-#endif /* CONFIG_PPC64BRIDGE */
-
-#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
-#define HASH_RIGHT	31-LG_PTEG_SIZE
-
-_GLOBAL(create_hpte)
-	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
-	rlwinm	r8,r5,32-10,31,31	/* _PAGE_RW -> PP lsb */
-	rlwinm	r0,r5,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
-	and	r8,r8,r0		/* writable if _RW & _DIRTY */
-	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
-	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r8,r8,0xe14		/* clear out reserved bits and M */
-	andc	r8,r5,r8		/* PP = user? (rw&dirty? 2: 3): 0 */
-BEGIN_FTR_SECTION
-	ori	r8,r8,_PAGE_COHERENT	/* set M (coherence required) */
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
-
-	/* Construct the high word of the PPC-style PTE (r5) */
-#ifndef CONFIG_PPC64BRIDGE
-	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
-	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
-#else /* CONFIG_PPC64BRIDGE */
-	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
-	sldi	r5,r3,12		/* shift vsid into position */
-	rlwimi	r5,r4,16,20,24		/* put in API (abbrev page index) */
-#endif /* CONFIG_PPC64BRIDGE */
-	SET_V(r5)			/* set V (valid) bit */
-
-	/* Get the address of the primary PTE group in the hash table (r3) */
-_GLOBAL(hash_page_patch_A)
-	addis	r0,r7,Hash_base@h	/* base address of hash table */
-	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
-	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
-	xor	r3,r3,r0		/* make primary hash */
-	li	r0,8			/* PTEs/group */
-
-	/*
-	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
-	 * if it is clear, meaning that the HPTE isn't there already...
-	 */
-	andi.	r6,r6,_PAGE_HASHPTE
-	beq+	10f			/* no PTE: go look for an empty slot */
-	tlbie	r4
-
-	addis	r4,r7,htab_hash_searches@ha
-	lwz	r6,htab_hash_searches@l(r4)
-	addi	r6,r6,1			/* count how many searches we do */
-	stw	r6,htab_hash_searches@l(r4)
-
-	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
-	mtctr	r0
-	addi	r4,r3,-PTE_SIZE
-1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
-	CMPPTE	0,r6,r5
-	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
-	beq+	found_slot
-
-	/* Search the secondary PTEG for a matching PTE */
-	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
-_GLOBAL(hash_page_patch_B)
-	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
-	xori	r4,r4,(-PTEG_SIZE & 0xffff)
-	addi	r4,r4,-PTE_SIZE
-	mtctr	r0
-2:	LDPTEu	r6,PTE_SIZE(r4)
-	CMPPTE	0,r6,r5
-	bdnzf	2,2b
-	beq+	found_slot
-	xori	r5,r5,PTE_H		/* clear H bit again */
-
-	/* Search the primary PTEG for an empty slot */
-10:	mtctr	r0
-	addi	r4,r3,-PTE_SIZE		/* search primary PTEG */
-1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
-	TST_V(r6)			/* test valid bit */
-	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
-	beq+	found_empty
-
-	/* update counter of times that the primary PTEG is full */
-	addis	r4,r7,primary_pteg_full@ha
-	lwz	r6,primary_pteg_full@l(r4)
-	addi	r6,r6,1
-	stw	r6,primary_pteg_full@l(r4)
-
-	/* Search the secondary PTEG for an empty slot */
-	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
-_GLOBAL(hash_page_patch_C)
-	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
-	xori	r4,r4,(-PTEG_SIZE & 0xffff)
-	addi	r4,r4,-PTE_SIZE
-	mtctr	r0
-2:	LDPTEu	r6,PTE_SIZE(r4)
-	TST_V(r6)
-	bdnzf	2,2b
-	beq+	found_empty
-	xori	r5,r5,PTE_H		/* clear H bit again */
-
-	/*
-	 * Choose an arbitrary slot in the primary PTEG to overwrite.
-	 * Since both the primary and secondary PTEGs are full, and we
-	 * have no information that the PTEs in the primary PTEG are
-	 * more important or useful than those in the secondary PTEG,
-	 * and we know there is a definite (although small) speed
-	 * advantage to putting the PTE in the primary PTEG, we always
-	 * put the PTE in the primary PTEG.
-	 */
-	addis	r4,r7,next_slot@ha
-	lwz	r6,next_slot@l(r4)
-	addi	r6,r6,PTE_SIZE
-	andi.	r6,r6,7*PTE_SIZE
-	stw	r6,next_slot@l(r4)
-	add	r4,r3,r6
-
-#ifndef CONFIG_SMP
-	/* Store PTE in PTEG */
-found_empty:
-	STPTE	r5,0(r4)
-found_slot:
-	STPTE	r8,PTE_SIZE/2(r4)
-
-#else /* CONFIG_SMP */
-/*
- * Between the tlbie above and updating the hash table entry below,
- * another CPU could read the hash table entry and put it in its TLB.
- * There are 3 cases:
- * 1. using an empty slot
- * 2. updating an earlier entry to change permissions (i.e. enable write)
- * 3. taking over the PTE for an unrelated address
- *
- * In each case it doesn't really matter if the other CPUs have the old
- * PTE in their TLB.  So we don't need to bother with another tlbie here,
- * which is convenient as we've overwritten the register that had the
- * address. :-)  The tlbie above is mainly to make sure that this CPU comes
- * and gets the new PTE from the hash table.
- *
- * We do however have to make sure that the PTE is never in an invalid
- * state with the V bit set.
- */
-found_empty:
-found_slot:
-	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
-	STPTE	r5,0(r4)
-	sync
-	TLBSYNC
-	STPTE	r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
-	sync
-	SET_V(r5)
-	STPTE	r5,0(r4)	/* finally set V bit in PTE */
-#endif /* CONFIG_SMP */
-
-	sync		/* make sure pte updates get to memory */
-	blr
-
-	.comm	next_slot,4
-	.comm	primary_pteg_full,4
-	.comm	htab_hash_searches,4
-
-/*
- * Flush the entry for a particular page from the hash table.
- *
- * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
- *		    int count)
- *
- * We assume that there is a hash table in use (Hash != 0).
- */
-_GLOBAL(flush_hash_pages)
-	tophys(r7,0)
-
-	/*
-	 * We disable interrupts here, even on UP, because we want
-	 * the _PAGE_HASHPTE bit to be a reliable indication of
-	 * whether the HPTE exists (or at least whether one did once).
-	 * We also turn off the MMU for data accesses so that we
-	 * we can't take a hash table miss (assuming the code is
-	 * covered by a BAT).  -- paulus
-	 */
-	mfmsr	r10
-	SYNC
-	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
-	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
-	mtmsr	r0
-	SYNC_601
-	isync
-
-	/* First find a PTE in the range that has _PAGE_HASHPTE set */
-	rlwimi	r5,r4,22,20,29
-1:	lwz	r0,0(r5)
-	cmpwi	cr1,r6,1
-	andi.	r0,r0,_PAGE_HASHPTE
-	bne	2f
-	ble	cr1,19f
-	addi	r4,r4,0x1000
-	addi	r5,r5,4
-	addi	r6,r6,-1
-	b	1b
-
-	/* Convert context and va to VSID */
-2:	mulli	r3,r3,897*16		/* multiply context by context skew */
-	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
-	mulli	r0,r0,0x111		/* multiply by ESID skew */
-	add	r3,r3,r0		/* note code below trims to 24 bits */
-
-	/* Construct the high word of the PPC-style PTE (r11) */
-#ifndef CONFIG_PPC64BRIDGE
-	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
-	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
-#else /* CONFIG_PPC64BRIDGE */
-	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
-	sldi	r11,r3,12		/* shift vsid into position */
-	rlwimi	r11,r4,16,20,24		/* put in API (abbrev page index) */
-#endif /* CONFIG_PPC64BRIDGE */
-	SET_V(r11)			/* set V (valid) bit */
-
-#ifdef CONFIG_SMP
-	addis	r9,r7,mmu_hash_lock@ha
-	addi	r9,r9,mmu_hash_lock@l
-	rlwinm	r8,r1,0,0,18
-	add	r8,r8,r7
-	lwz	r8,TI_CPU(r8)
-	oris	r8,r8,9
-10:	lwarx	r0,0,r9
-	cmpi	0,r0,0
-	bne-	11f
-	stwcx.	r8,0,r9
-	beq+	12f
-11:	lwz	r0,0(r9)
-	cmpi	0,r0,0
-	beq	10b
-	b	11b
-12:	isync
-#endif
-
-	/*
-	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
-	 * already clear, we're done (for this pte).  If not,
-	 * clear it (atomically) and proceed.  -- paulus.
-	 */
-33:	lwarx	r8,0,r5			/* fetch the pte */
-	andi.	r0,r8,_PAGE_HASHPTE
-	beq	8f			/* done if HASHPTE is already clear */
-	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
-	stwcx.	r8,0,r5			/* update the pte */
-	bne-	33b
-
-	/* Get the address of the primary PTE group in the hash table (r3) */
-_GLOBAL(flush_hash_patch_A)
-	addis	r8,r7,Hash_base@h	/* base address of hash table */
-	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
-	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
-	xor	r8,r0,r8		/* make primary hash */
-
-	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
-	li	r0,8			/* PTEs/group */
-	mtctr	r0
-	addi	r12,r8,-PTE_SIZE
-1:	LDPTEu	r0,PTE_SIZE(r12)	/* get next PTE */
-	CMPPTE	0,r0,r11
-	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
-	beq+	3f
-
-	/* Search the secondary PTEG for a matching PTE */
-	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
-	li	r0,8			/* PTEs/group */
-_GLOBAL(flush_hash_patch_B)
-	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
-	xori	r12,r12,(-PTEG_SIZE & 0xffff)
-	addi	r12,r12,-PTE_SIZE
-	mtctr	r0
-2:	LDPTEu	r0,PTE_SIZE(r12)
-	CMPPTE	0,r0,r11
-	bdnzf	2,2b
-	xori	r11,r11,PTE_H		/* clear H again */
-	bne-	4f			/* should rarely fail to find it */
-
-3:	li	r0,0
-	STPTE	r0,0(r12)		/* invalidate entry */
-4:	sync
-	tlbie	r4			/* in hw tlb too */
-	sync
-
-8:	ble	cr1,9f			/* if all ptes checked */
-81:	addi	r6,r6,-1
-	addi	r5,r5,4			/* advance to next pte */
-	addi	r4,r4,0x1000
-	lwz	r0,0(r5)		/* check next pte */
-	cmpwi	cr1,r6,1
-	andi.	r0,r0,_PAGE_HASHPTE
-	bne	33b
-	bgt	cr1,81b
-
-9:
-#ifdef CONFIG_SMP
-	TLBSYNC
-	li	r0,0
-	stw	r0,0(r9)		/* clear mmu_hash_lock */
-#endif
-
-19:	mtmsr	r10
-	SYNC_601
-	isync
-	blr
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
new file mode 100644
index 00000000000..57278a8dd13
--- /dev/null
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -0,0 +1,618 @@
+/*
+ *  arch/ppc/kernel/hashtable.S
+ *
+ *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  This file contains low-level assembler routines for managing
+ *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
+ *  hash table, so this file is not used on them.)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SMP
+	.comm	mmu_hash_lock,4
+#endif /* CONFIG_SMP */
+
+/*
+ * Sync CPUs with hash_page taking & releasing the hash
+ * table lock
+ */
+#ifdef CONFIG_SMP
+	.text
+_GLOBAL(hash_page_sync)
+	lis	r8,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+	eieio
+	li	r0,0
+	stw	r0,0(r8)
+	blr	
+#endif
+
+/*
+ * Load a PTE into the hash table, if possible.
+ * The address is in r4, and r3 contains an access flag:
+ * _PAGE_RW (0x400) if a write.
+ * r9 contains the SRR1 value, from which we use the MSR_PR bit.
+ * SPRG3 contains the physical address of the current task's thread.
+ *
+ * Returns to the caller if the access is illegal or there is no
+ * mapping for the address.  Otherwise it places an appropriate PTE
+ * in the hash table and returns from the exception.
+ * Uses r0, r3 - r8, ctr, lr.
+ */
+	.text
+_GLOBAL(hash_page)
+#ifdef CONFIG_PPC64BRIDGE
+	mfmsr	r0
+	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
+	MTMSRD(r0)
+	isync
+#endif
+	tophys(r7,0)			/* gets -KERNELBASE into r7 */
+#ifdef CONFIG_SMP
+	addis	r8,r7,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+#endif
+	/* Get PTE (linux-style) and check access */
+	lis	r0,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r4,r0
+	mfspr	r8,SPRN_SPRG3		/* current task's THREAD (phys) */
+	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
+	lwz	r5,PGDIR(r8)		/* virt page-table root */
+	blt+	112f			/* assume user more likely */
+	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
+	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
+112:	add	r5,r5,r7		/* convert to phys addr */
+	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
+	lwz	r8,0(r5)		/* get pmd entry */
+	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
+#ifdef CONFIG_SMP
+	beq-	hash_page_out		/* return if no mapping */
+#else
+	/* XXX it seems like the 601 will give a machine fault on the
+	   rfi if its alignment is wrong (bottom 4 bits of address are
+	   8 or 0xc) and we have had a not-taken conditional branch
+	   to the address following the rfi. */
+	beqlr-
+#endif
+	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
+	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
+	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
+
+	/*
+	 * Update the linux PTE atomically.  We do the lwarx up-front
+	 * because almost always, there won't be a permission violation
+	 * and there won't already be an HPTE, and thus we will have
+	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
+	 */
+retry:
+	lwarx	r6,0,r8			/* get linux-style pte */
+	andc.	r5,r3,r6		/* check access & ~permission */
+#ifdef CONFIG_SMP
+	bne-	hash_page_out		/* return if access not permitted */
+#else
+	bnelr-
+#endif
+	or	r5,r0,r6		/* set accessed/dirty bits */
+	stwcx.	r5,0,r8			/* attempt to update PTE */
+	bne-	retry			/* retry if someone got there first */
+
+	mfsrin	r3,r4			/* get segment reg for segment */
+	mfctr	r0
+	stw	r0,_CTR(r11)
+	bl	create_hpte		/* add the hash table entry */
+
+#ifdef CONFIG_SMP
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+#endif
+
+	/* Return from the exception */
+	lwz	r5,_CTR(r11)
+	mtctr	r5
+	lwz	r0,GPR0(r11)
+	lwz	r7,GPR7(r11)
+	lwz	r8,GPR8(r11)
+	b	fast_exception_return
+
+#ifdef CONFIG_SMP
+hash_page_out:
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+	blr
+#endif /* CONFIG_SMP */
+
+/*
+ * Add an entry for a particular page to the hash table.
+ *
+ * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
+ *
+ * We assume any necessary modifications to the pte (e.g. setting
+ * the accessed bit) have already been done and that there is actually
+ * a hash table in use (i.e. we're not on a 603).
+ */
+_GLOBAL(add_hash_page)
+	mflr	r0
+	stw	r0,4(r1)
+
+	/* Convert context and va to VSID */
+	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
+
+#ifdef CONFIG_SMP
+	rlwinm	r8,r1,0,0,18		/* use cpu number to make tag */
+	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
+	oris	r8,r8,12
+#endif /* CONFIG_SMP */
+
+	/*
+	 * We disable interrupts here, even on UP, because we don't
+	 * want to race with hash_page, and because we want the
+	 * _PAGE_HASHPTE bit to be a reliable indication of whether
+	 * the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	tophys(r7,0)
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+10:	lwarx	r0,0,r9			/* take the mmu_hash_lock */
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
+	 * If _PAGE_HASHPTE was already set, we don't replace the existing
+	 * HPTE, so we just unlock and return.
+	 */
+	mr	r8,r5
+	rlwimi	r8,r4,22,20,29
+1:	lwarx	r6,0,r8
+	andi.	r0,r6,_PAGE_HASHPTE
+	bne	9f			/* if HASHPTE already set, done */
+	ori	r5,r6,_PAGE_HASHPTE
+	stwcx.	r5,0,r8
+	bne-	1b
+
+	bl	create_hpte
+
+9:
+#ifdef CONFIG_SMP
+	eieio
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+	/* reenable interrupts and DR */
+	mtmsr	r10
+	SYNC_601
+	isync
+
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+/*
+ * This routine adds a hardware PTE to the hash table.
+ * It is designed to be called with the MMU either on or off.
+ * r3 contains the VSID, r4 contains the virtual address,
+ * r5 contains the linux PTE, r6 contains the old value of the
+ * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
+ * offset to be added to addresses (0 if the MMU is on,
+ * -KERNELBASE if it is off).
+ * On SMP, the caller should have the mmu_hash_lock held.
+ * We assume that the caller has (or will) set the _PAGE_HASHPTE
+ * bit in the linux PTE in memory.  The value passed in r6 should
+ * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
+ * this routine will skip the search for an existing HPTE.
+ * This procedure modifies r0, r3 - r6, r8, cr0.
+ *  -- paulus.
+ *
+ * For speed, 4 of the instructions get patched once the size and
+ * physical address of the hash table are known.  These definitions
+ * of Hash_base and Hash_bits below are just an example.
+ */
+Hash_base = 0xc0180000
+Hash_bits = 12				/* e.g. 256kB hash table */
+Hash_msk = (((1 << Hash_bits) - 1) * 64)
+
+#ifndef CONFIG_PPC64BRIDGE
+/* defines for the PTE format for 32-bit PPCs */
+#define PTE_SIZE	8
+#define PTEG_SIZE	64
+#define LG_PTEG_SIZE	6
+#define LDPTEu		lwzu
+#define STPTE		stw
+#define CMPPTE		cmpw
+#define PTE_H		0x40
+#define PTE_V		0x80000000
+#define TST_V(r)	rlwinm. r,r,0,0,0
+#define SET_V(r)	oris r,r,PTE_V@h
+#define CLR_V(r,t)	rlwinm r,r,0,1,31
+
+#else
+/* defines for the PTE format for 64-bit PPCs */
+#define PTE_SIZE	16
+#define PTEG_SIZE	128
+#define LG_PTEG_SIZE	7
+#define LDPTEu		ldu
+#define STPTE		std
+#define CMPPTE		cmpd
+#define PTE_H		2
+#define PTE_V		1
+#define TST_V(r)	andi. r,r,PTE_V
+#define SET_V(r)	ori r,r,PTE_V
+#define CLR_V(r,t)	li t,PTE_V; andc r,r,t
+#endif /* CONFIG_PPC64BRIDGE */
+
+#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
+#define HASH_RIGHT	31-LG_PTEG_SIZE
+
+_GLOBAL(create_hpte)
+	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
+	rlwinm	r8,r5,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r0,r5,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r8,r8,r0		/* writable if _RW & _DIRTY */
+	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r8,r8,0xe14		/* clear out reserved bits and M */
+	andc	r8,r5,r8		/* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+	ori	r8,r8,_PAGE_COHERENT	/* set M (coherence required) */
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
+
+	/* Construct the high word of the PPC-style PTE (r5) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r5,r3,12		/* shift vsid into position */
+	rlwimi	r5,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r5)			/* set V (valid) bit */
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(hash_page_patch_A)
+	addis	r0,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r3,r3,r0		/* make primary hash */
+	li	r0,8			/* PTEs/group */
+
+	/*
+	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
+	 * if it is clear, meaning that the HPTE isn't there already...
+	 */
+	andi.	r6,r6,_PAGE_HASHPTE
+	beq+	10f			/* no PTE: go look for an empty slot */
+	tlbie	r4
+
+	addis	r4,r7,htab_hash_searches@ha
+	lwz	r6,htab_hash_searches@l(r4)
+	addi	r6,r6,1			/* count how many searches we do */
+	stw	r6,htab_hash_searches@l(r4)
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	mtctr	r0
+	addi	r4,r3,-PTE_SIZE
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	CMPPTE	0,r6,r5
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_slot
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_B)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	CMPPTE	0,r6,r5
+	bdnzf	2,2b
+	beq+	found_slot
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/* Search the primary PTEG for an empty slot */
+10:	mtctr	r0
+	addi	r4,r3,-PTE_SIZE		/* search primary PTEG */
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	TST_V(r6)			/* test valid bit */
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_empty
+
+	/* update counter of times that the primary PTEG is full */
+	addis	r4,r7,primary_pteg_full@ha
+	lwz	r6,primary_pteg_full@l(r4)
+	addi	r6,r6,1
+	stw	r6,primary_pteg_full@l(r4)
+
+	/* Search the secondary PTEG for an empty slot */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_C)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	TST_V(r6)
+	bdnzf	2,2b
+	beq+	found_empty
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/*
+	 * Choose an arbitrary slot in the primary PTEG to overwrite.
+	 * Since both the primary and secondary PTEGs are full, and we
+	 * have no information that the PTEs in the primary PTEG are
+	 * more important or useful than those in the secondary PTEG,
+	 * and we know there is a definite (although small) speed
+	 * advantage to putting the PTE in the primary PTEG, we always
+	 * put the PTE in the primary PTEG.
+	 */
+	addis	r4,r7,next_slot@ha
+	lwz	r6,next_slot@l(r4)
+	addi	r6,r6,PTE_SIZE
+	andi.	r6,r6,7*PTE_SIZE
+	stw	r6,next_slot@l(r4)
+	add	r4,r3,r6
+
+#ifndef CONFIG_SMP
+	/* Store PTE in PTEG */
+found_empty:
+	STPTE	r5,0(r4)
+found_slot:
+	STPTE	r8,PTE_SIZE/2(r4)
+
+#else /* CONFIG_SMP */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB.  So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-)  The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+found_empty:
+found_slot:
+	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
+	STPTE	r5,0(r4)
+	sync
+	TLBSYNC
+	STPTE	r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
+	sync
+	SET_V(r5)
+	STPTE	r5,0(r4)	/* finally set V bit in PTE */
+#endif /* CONFIG_SMP */
+
+	sync		/* make sure pte updates get to memory */
+	blr
+
+	.comm	next_slot,4
+	.comm	primary_pteg_full,4
+	.comm	htab_hash_searches,4
+
+/*
+ * Flush the entry for a particular page from the hash table.
+ *
+ * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
+ *		    int count)
+ *
+ * We assume that there is a hash table in use (Hash != 0).
+ */
+_GLOBAL(flush_hash_pages)
+	tophys(r7,0)
+
+	/*
+	 * We disable interrupts here, even on UP, because we want
+	 * the _PAGE_HASHPTE bit to be a reliable indication of
+	 * whether the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	/* First find a PTE in the range that has _PAGE_HASHPTE set */
+	rlwimi	r5,r4,22,20,29
+1:	lwz	r0,0(r5)
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	2f
+	ble	cr1,19f
+	addi	r4,r4,0x1000
+	addi	r5,r5,4
+	addi	r6,r6,-1
+	b	1b
+
+	/* Convert context and va to VSID */
+2:	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note code below trims to 24 bits */
+
+	/* Construct the high word of the PPC-style PTE (r11) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r11,r3,12		/* shift vsid into position */
+	rlwimi	r11,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r11)			/* set V (valid) bit */
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+	rlwinm	r8,r1,0,0,18
+	add	r8,r8,r7
+	lwz	r8,TI_CPU(r8)
+	oris	r8,r8,9
+10:	lwarx	r0,0,r9
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
+	 * already clear, we're done (for this pte).  If not,
+	 * clear it (atomically) and proceed.  -- paulus.
+	 */
+33:	lwarx	r8,0,r5			/* fetch the pte */
+	andi.	r0,r8,_PAGE_HASHPTE
+	beq	8f			/* done if HASHPTE is already clear */
+	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
+	stwcx.	r8,0,r5			/* update the pte */
+	bne-	33b
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(flush_hash_patch_A)
+	addis	r8,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r8,r0,r8		/* make primary hash */
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	li	r0,8			/* PTEs/group */
+	mtctr	r0
+	addi	r12,r8,-PTE_SIZE
+1:	LDPTEu	r0,PTE_SIZE(r12)	/* get next PTE */
+	CMPPTE	0,r0,r11
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	3f
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
+	li	r0,8			/* PTEs/group */
+_GLOBAL(flush_hash_patch_B)
+	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
+	xori	r12,r12,(-PTEG_SIZE & 0xffff)
+	addi	r12,r12,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r0,PTE_SIZE(r12)
+	CMPPTE	0,r0,r11
+	bdnzf	2,2b
+	xori	r11,r11,PTE_H		/* clear H again */
+	bne-	4f			/* should rarely fail to find it */
+
+3:	li	r0,0
+	STPTE	r0,0(r12)		/* invalidate entry */
+4:	sync
+	tlbie	r4			/* in hw tlb too */
+	sync
+
+8:	ble	cr1,9f			/* if all ptes checked */
+81:	addi	r6,r6,-1
+	addi	r5,r5,4			/* advance to next pte */
+	addi	r4,r4,0x1000
+	lwz	r0,0(r5)		/* check next pte */
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	33b
+	bgt	cr1,81b
+
+9:
+#ifdef CONFIG_SMP
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+19:	mtmsr	r10
+	SYNC_601
+	isync
+	blr
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
deleted file mode 100644
index bf13c14e66b..00000000000
--- a/arch/powerpc/mm/init.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/pagemap.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/btext.h>
-#include <asm/tlb.h>
-#include <asm/bootinfo.h>
-#include <asm/prom.h>
-#include <asm/lmb.h>
-#include <asm/sections.h>
-
-#include "mmu_decl.h"
-
-#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
-/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
-#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
-#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
-#endif
-#endif
-#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-unsigned long total_memory;
-unsigned long total_lowmem;
-
-unsigned long ppc_memstart;
-unsigned long ppc_memoffset = PAGE_OFFSET;
-
-int boot_mapsize;
-#ifdef CONFIG_PPC_PMAC
-unsigned long agp_special_page;
-#endif
-
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
-#endif
-
-void MMU_init(void);
-
-/* XXX should be in current.h  -- paulus */
-extern struct task_struct *current_set[NR_CPUS];
-
-char *klimit = _end;
-struct device_node *memory_node;
-
-extern int init_bootmem_done;
-
-/*
- * this tells the system to map all of ram with the segregs
- * (i.e. page tables) instead of the bats.
- * -- Cort
- */
-int __map_without_bats;
-int __map_without_ltlbs;
-
-/* max amount of low RAM to map in */
-unsigned long __max_low_memory = MAX_LOW_MEM;
-
-/*
- * limit of what is accessible with initial MMU setup -
- * 256MB usually, but only 16MB on 601.
- */
-unsigned long __initial_memory_limit = 0x10000000;
-
-/*
- * Check for command-line options that affect what MMU_init will do.
- */
-void MMU_setup(void)
-{
-	/* Check for nobats option (used in mapin_ram). */
-	if (strstr(cmd_line, "nobats")) {
-		__map_without_bats = 1;
-	}
-
-	if (strstr(cmd_line, "noltlbs")) {
-		__map_without_ltlbs = 1;
-	}
-}
-
-/*
- * MMU_init sets up the basic memory mappings for the kernel,
- * including both RAM and possibly some I/O regions,
- * and sets up the page tables and the MMU hardware ready to go.
- */
-void __init MMU_init(void)
-{
-	if (ppc_md.progress)
-		ppc_md.progress("MMU:enter", 0x111);
-
-	/* 601 can only access 16MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		__initial_memory_limit = 0x01000000;
-
-	/* parse args from command line */
-	MMU_setup();
-
-	if (lmb.memory.cnt > 1) {
-		lmb.memory.cnt = 1;
-		lmb_analyze();
-		printk(KERN_WARNING "Only using first contiguous memory region");
-	}
-
-	total_memory = lmb_end_of_DRAM();
-	total_lowmem = total_memory;
-
-#ifdef CONFIG_FSL_BOOKE
-	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
-	 * entries, so we need to adjust lowmem to match the amount we can map
-	 * in the fixed entries */
-	adjust_total_lowmem();
-#endif /* CONFIG_FSL_BOOKE */
-	if (total_lowmem > __max_low_memory) {
-		total_lowmem = __max_low_memory;
-#ifndef CONFIG_HIGHMEM
-		total_memory = total_lowmem;
-#endif /* CONFIG_HIGHMEM */
-	}
-
-	/* Initialize the MMU hardware */
-	if (ppc_md.progress)
-		ppc_md.progress("MMU:hw init", 0x300);
-	MMU_init_hw();
-
-	/* Map in all of RAM starting at KERNELBASE */
-	if (ppc_md.progress)
-		ppc_md.progress("MMU:mapin", 0x301);
-	mapin_ram();
-
-#ifdef CONFIG_HIGHMEM
-	ioremap_base = PKMAP_BASE;
-#else
-	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
-#endif /* CONFIG_HIGHMEM */
-	ioremap_bot = ioremap_base;
-
-	/* Map in I/O resources */
-	if (ppc_md.progress)
-		ppc_md.progress("MMU:setio", 0x302);
-	if (ppc_md.setup_io_mappings)
-		ppc_md.setup_io_mappings();
-
-	/* Initialize the context management stuff */
-	mmu_context_init();
-
-	if (ppc_md.progress)
-		ppc_md.progress("MMU:exit", 0x211);
-
-#ifdef CONFIG_BOOTX_TEXT
-	/* By default, we are no longer mapped */
-       	boot_text_mapped = 0;
-	/* Must be done last, or ppc_md.progress will die. */
-	map_boot_text();
-#endif
-}
-
-/* This is only called until mem_init is done. */
-void __init *early_get_page(void)
-{
-	void *p;
-
-	if (init_bootmem_done) {
-		p = alloc_bootmem_pages(PAGE_SIZE);
-	} else {
-		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
-					__initial_memory_limit));
-	}
-	return p;
-}
-
-/* Free up now-unused memory */
-static void free_sec(unsigned long start, unsigned long end, const char *name)
-{
-	unsigned long cnt = 0;
-
-	while (start < end) {
-		ClearPageReserved(virt_to_page(start));
-		set_page_count(virt_to_page(start), 1);
-		free_page(start);
-		cnt++;
-		start += PAGE_SIZE;
- 	}
-	if (cnt) {
-		printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
-		totalram_pages += cnt;
-	}
-}
-
-void free_initmem(void)
-{
-#define FREESEC(TYPE) \
-	free_sec((unsigned long)(&__ ## TYPE ## _begin), \
-		 (unsigned long)(&__ ## TYPE ## _end), \
-		 #TYPE);
-
-	printk ("Freeing unused kernel memory:");
-	FREESEC(init);
- 	printk("\n");
-	ppc_md.progress = NULL;
-#undef FREESEC
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	if (start < end)
-		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		set_page_count(virt_to_page(start), 1);
-		free_page(start);
-		totalram_pages++;
-	}
-}
-#endif
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c
deleted file mode 100644
index c0ce6a7af3c..00000000000
--- a/arch/powerpc/mm/init64.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  Dave Engebretsen <engebret@us.ibm.com>
- *      Rework for PPC64 port.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/nodemask.h>
-#include <linux/module.h>
-
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/lmb.h>
-#include <asm/rtas.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/tlb.h>
-#include <asm/eeh.h>
-#include <asm/processor.h>
-#include <asm/mmzone.h>
-#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/iommu.h>
-#include <asm/abs_addr.h>
-#include <asm/vdso.h>
-#include <asm/imalloc.h>
-
-#if PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
-
-int mem_init_done;
-unsigned long ioremap_bot = IMALLOC_BASE;
-static unsigned long phbs_io_bot = PHBS_IO_BASE;
-
-extern pgd_t swapper_pg_dir[];
-extern struct task_struct *current_set[NR_CPUS];
-
-unsigned long klimit = (unsigned long)_end;
-
-unsigned long _SDR1=0;
-unsigned long _ASR=0;
-
-/* max amount of RAM to use */
-unsigned long __max_memory;
-
-/* info on what we think the IO hole is */
-unsigned long 	io_hole_start;
-unsigned long	io_hole_size;
-
-/*
- * Do very early mm setup.
- */
-void __init mm_init_ppc64(void)
-{
-#ifndef CONFIG_PPC_ISERIES
-	unsigned long i;
-#endif
-
-	ppc64_boot_msg(0x100, "MM Init");
-
-	/* This is the story of the IO hole... please, keep seated,
-	 * unfortunately, we are out of oxygen masks at the moment.
-	 * So we need some rough way to tell where your big IO hole
-	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
-	 * that area as well, on POWER4 we don't have one, etc...
-	 * We need that as a "hint" when sizing the TCE table on POWER3
-	 * So far, the simplest way that seem work well enough for us it
-	 * to just assume that the first discontinuity in our physical
-	 * RAM layout is the IO hole. That may not be correct in the future
-	 * (and isn't on iSeries but then we don't care ;)
-	 */
-
-#ifndef CONFIG_PPC_ISERIES
-	for (i = 1; i < lmb.memory.cnt; i++) {
-		unsigned long base, prevbase, prevsize;
-
-		prevbase = lmb.memory.region[i-1].base;
-		prevsize = lmb.memory.region[i-1].size;
-		base = lmb.memory.region[i].base;
-		if (base > (prevbase + prevsize)) {
-			io_hole_start = prevbase + prevsize;
-			io_hole_size = base  - (prevbase + prevsize);
-			break;
-		}
-	}
-#endif /* CONFIG_PPC_ISERIES */
-	if (io_hole_start)
-		printk("IO Hole assumed to be %lx -> %lx\n",
-		       io_hole_start, io_hole_start + io_hole_size - 1);
-
-	ppc64_boot_msg(0x100, "MM Init Done");
-}
-
-void free_initmem(void)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)__init_begin;
-	for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
-		memset((void *)addr, 0xcc, PAGE_SIZE);
-		ClearPageReserved(virt_to_page(addr));
-		set_page_count(virt_to_page(addr), 1);
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk ("Freeing unused kernel memory: %luk freed\n",
-		((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	if (start < end)
-		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		set_page_count(virt_to_page(start), 1);
-		free_page(start);
-		totalram_pages++;
-	}
-}
-#endif
-
-static struct kcore_list kcore_vmem;
-
-static int __init setup_kcore(void)
-{
-	int i;
-
-	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base, size;
-		struct kcore_list *kcore_mem;
-
-		base = lmb.memory.region[i].base;
-		size = lmb.memory.region[i].size;
-
-		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
-		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
-		if (!kcore_mem)
-			panic("mem_init: kmalloc failed\n");
-
-		kclist_add(kcore_mem, __va(base), size);
-	}
-
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
-
-	return 0;
-}
-module_init(setup_kcore);
-
-void __iomem * reserve_phb_iospace(unsigned long size)
-{
-	void __iomem *virt_addr;
-		
-	if (phbs_io_bot >= IMALLOC_BASE) 
-		panic("reserve_phb_iospace(): phb io space overflow\n");
-			
-	virt_addr = (void __iomem *) phbs_io_bot;
-	phbs_io_bot += size;
-
-	return virt_addr;
-}
-
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
-{
-	memset(addr, 0, kmem_cache_size(cache));
-}
-
-static const int pgtable_cache_size[2] = {
-	PTE_TABLE_SIZE, PMD_TABLE_SIZE
-};
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
-	"pgd_pte_cache", "pud_pmd_cache",
-};
-
-kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
-
-void pgtable_cache_init(void)
-{
-	int i;
-
-	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
-	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
-	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
-	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
-	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
-		int size = pgtable_cache_size[i];
-		const char *name = pgtable_cache_name[i];
-
-		pgtable_cache[i] = kmem_cache_create(name,
-						     size, size,
-						     SLAB_HWCACHE_ALIGN
-						     | SLAB_MUST_HWCACHE_ALIGN,
-						     zero_ctor,
-						     NULL);
-		if (! pgtable_cache[i])
-			panic("pgtable_cache_init(): could not create %s!\n",
-			      name);
-	}
-}
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
-
-	if (!page_is_ram(addr >> PAGE_SHIFT))
-		vma_prot = __pgprot(pgprot_val(vma_prot)
-				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
-	return vma_prot;
-}
-EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
new file mode 100644
index 00000000000..bf13c14e66b
--- /dev/null
+++ b/arch/powerpc/mm/init_32.c
@@ -0,0 +1,258 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/sections.h>
+
+#include "mmu_decl.h"
+
+#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
+/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
+#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
+#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
+#endif
+#endif
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+unsigned long total_memory;
+unsigned long total_lowmem;
+
+unsigned long ppc_memstart;
+unsigned long ppc_memoffset = PAGE_OFFSET;
+
+int boot_mapsize;
+#ifdef CONFIG_PPC_PMAC
+unsigned long agp_special_page;
+#endif
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
+#endif
+
+void MMU_init(void);
+
+/* XXX should be in current.h  -- paulus */
+extern struct task_struct *current_set[NR_CPUS];
+
+char *klimit = _end;
+struct device_node *memory_node;
+
+extern int init_bootmem_done;
+
+/*
+ * this tells the system to map all of ram with the segregs
+ * (i.e. page tables) instead of the bats.
+ * -- Cort
+ */
+int __map_without_bats;
+int __map_without_ltlbs;
+
+/* max amount of low RAM to map in */
+unsigned long __max_low_memory = MAX_LOW_MEM;
+
+/*
+ * limit of what is accessible with initial MMU setup -
+ * 256MB usually, but only 16MB on 601.
+ */
+unsigned long __initial_memory_limit = 0x10000000;
+
+/*
+ * Check for command-line options that affect what MMU_init will do.
+ */
+void MMU_setup(void)
+{
+	/* Check for nobats option (used in mapin_ram). */
+	if (strstr(cmd_line, "nobats")) {
+		__map_without_bats = 1;
+	}
+
+	if (strstr(cmd_line, "noltlbs")) {
+		__map_without_ltlbs = 1;
+	}
+}
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+void __init MMU_init(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:enter", 0x111);
+
+	/* 601 can only access 16MB at the moment */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		__initial_memory_limit = 0x01000000;
+
+	/* parse args from command line */
+	MMU_setup();
+
+	if (lmb.memory.cnt > 1) {
+		lmb.memory.cnt = 1;
+		lmb_analyze();
+		printk(KERN_WARNING "Only using first contiguous memory region");
+	}
+
+	total_memory = lmb_end_of_DRAM();
+	total_lowmem = total_memory;
+
+#ifdef CONFIG_FSL_BOOKE
+	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
+	 * entries, so we need to adjust lowmem to match the amount we can map
+	 * in the fixed entries */
+	adjust_total_lowmem();
+#endif /* CONFIG_FSL_BOOKE */
+	if (total_lowmem > __max_low_memory) {
+		total_lowmem = __max_low_memory;
+#ifndef CONFIG_HIGHMEM
+		total_memory = total_lowmem;
+#endif /* CONFIG_HIGHMEM */
+	}
+
+	/* Initialize the MMU hardware */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:hw init", 0x300);
+	MMU_init_hw();
+
+	/* Map in all of RAM starting at KERNELBASE */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:mapin", 0x301);
+	mapin_ram();
+
+#ifdef CONFIG_HIGHMEM
+	ioremap_base = PKMAP_BASE;
+#else
+	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
+#endif /* CONFIG_HIGHMEM */
+	ioremap_bot = ioremap_base;
+
+	/* Map in I/O resources */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:setio", 0x302);
+	if (ppc_md.setup_io_mappings)
+		ppc_md.setup_io_mappings();
+
+	/* Initialize the context management stuff */
+	mmu_context_init();
+
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:exit", 0x211);
+
+#ifdef CONFIG_BOOTX_TEXT
+	/* By default, we are no longer mapped */
+       	boot_text_mapped = 0;
+	/* Must be done last, or ppc_md.progress will die. */
+	map_boot_text();
+#endif
+}
+
+/* This is only called until mem_init is done. */
+void __init *early_get_page(void)
+{
+	void *p;
+
+	if (init_bootmem_done) {
+		p = alloc_bootmem_pages(PAGE_SIZE);
+	} else {
+		p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE,
+					__initial_memory_limit));
+	}
+	return p;
+}
+
+/* Free up now-unused memory */
+static void free_sec(unsigned long start, unsigned long end, const char *name)
+{
+	unsigned long cnt = 0;
+
+	while (start < end) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		cnt++;
+		start += PAGE_SIZE;
+ 	}
+	if (cnt) {
+		printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
+		totalram_pages += cnt;
+	}
+}
+
+void free_initmem(void)
+{
+#define FREESEC(TYPE) \
+	free_sec((unsigned long)(&__ ## TYPE ## _begin), \
+		 (unsigned long)(&__ ## TYPE ## _end), \
+		 #TYPE);
+
+	printk ("Freeing unused kernel memory:");
+	FREESEC(init);
+ 	printk("\n");
+	ppc_md.progress = NULL;
+#undef FREESEC
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
new file mode 100644
index 00000000000..c0ce6a7af3c
--- /dev/null
+++ b/arch/powerpc/mm/init_64.c
@@ -0,0 +1,259 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+unsigned long _SDR1=0;
+unsigned long _ASR=0;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+/*
+ * Do very early mm setup.
+ */
+void __init mm_init_ppc64(void)
+{
+#ifndef CONFIG_PPC_ISERIES
+	unsigned long i;
+#endif
+
+	ppc64_boot_msg(0x100, "MM Init");
+
+	/* This is the story of the IO hole... please, keep seated,
+	 * unfortunately, we are out of oxygen masks at the moment.
+	 * So we need some rough way to tell where your big IO hole
+	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
+	 * that area as well, on POWER4 we don't have one, etc...
+	 * We need that as a "hint" when sizing the TCE table on POWER3
+	 * So far, the simplest way that seem work well enough for us it
+	 * to just assume that the first discontinuity in our physical
+	 * RAM layout is the IO hole. That may not be correct in the future
+	 * (and isn't on iSeries but then we don't care ;)
+	 */
+
+#ifndef CONFIG_PPC_ISERIES
+	for (i = 1; i < lmb.memory.cnt; i++) {
+		unsigned long base, prevbase, prevsize;
+
+		prevbase = lmb.memory.region[i-1].base;
+		prevsize = lmb.memory.region[i-1].size;
+		base = lmb.memory.region[i].base;
+		if (base > (prevbase + prevsize)) {
+			io_hole_start = prevbase + prevsize;
+			io_hole_size = base  - (prevbase + prevsize);
+			break;
+		}
+	}
+#endif /* CONFIG_PPC_ISERIES */
+	if (io_hole_start)
+		printk("IO Hole assumed to be %lx -> %lx\n",
+		       io_hole_start, io_hole_start + io_hole_size - 1);
+
+	ppc64_boot_msg(0x100, "MM Init Done");
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)__init_begin;
+	for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk ("Freeing unused kernel memory: %luk freed\n",
+		((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+static struct kcore_list kcore_vmem;
+
+static int __init setup_kcore(void)
+{
+	int i;
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base, size;
+		struct kcore_list *kcore_mem;
+
+		base = lmb.memory.region[i].base;
+		size = lmb.memory.region[i].size;
+
+		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
+		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
+		if (!kcore_mem)
+			panic("mem_init: kmalloc failed\n");
+
+		kclist_add(kcore_mem, __va(base), size);
+	}
+
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+
+	return 0;
+}
+module_init(setup_kcore);
+
+void __iomem * reserve_phb_iospace(unsigned long size)
+{
+	void __iomem *virt_addr;
+		
+	if (phbs_io_bot >= IMALLOC_BASE) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void __iomem *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
+
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+{
+	memset(addr, 0, kmem_cache_size(cache));
+}
+
+static const int pgtable_cache_size[2] = {
+	PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+	"pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
+void pgtable_cache_init(void)
+{
+	int i;
+
+	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+		int size = pgtable_cache_size[i];
+		const char *name = pgtable_cache_name[i];
+
+		pgtable_cache[i] = kmem_cache_create(name,
+						     size, size,
+						     SLAB_HWCACHE_ALIGN
+						     | SLAB_MUST_HWCACHE_ALIGN,
+						     zero_ctor,
+						     NULL);
+		if (! pgtable_cache[i])
+			panic("pgtable_cache_init(): could not create %s!\n",
+			      name);
+	}
+}
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c
deleted file mode 100644
index ef765a84433..00000000000
--- a/arch/powerpc/mm/mem64.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- *  PowerPC version 
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  Dave Engebretsen <engebret@us.ibm.com>
- *      Rework for PPC64 port.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/nodemask.h>
-#include <linux/module.h>
-
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/lmb.h>
-#include <asm/rtas.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/tlb.h>
-#include <asm/eeh.h>
-#include <asm/processor.h>
-#include <asm/mmzone.h>
-#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/iommu.h>
-#include <asm/abs_addr.h>
-#include <asm/vdso.h>
-#include <asm/imalloc.h>
-
-/*
- * This is called by /dev/mem to know if a given address has to
- * be mapped non-cacheable or not
- */
-int page_is_ram(unsigned long pfn)
-{
-	int i;
-	unsigned long paddr = (pfn << PAGE_SHIFT);
-
-	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base;
-
-		base = lmb.memory.region[i].base;
-
-		if ((paddr >= base) &&
-			(paddr < (base + lmb.memory.region[i].size))) {
-			return 1;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(page_is_ram);
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
-
-	if (!page_is_ram(addr >> PAGE_SHIFT))
-		vma_prot = __pgprot(pgprot_val(vma_prot)
-				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
-	return vma_prot;
-}
-EXPORT_SYMBOL(phys_mem_access_prot);
-
-void show_mem(void)
-{
-	unsigned long total = 0, reserved = 0;
-	unsigned long shared = 0, cached = 0;
-	struct page *page;
-	pg_data_t *pgdat;
-	unsigned long i;
-
-	printk("Mem-info:\n");
-	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			page = pgdat_page_nr(pgdat, i);
-			total++;
-			if (PageReserved(page))
-				reserved++;
-			else if (PageSwapCache(page))
-				cached++;
-			else if (page_count(page))
-				shared += page_count(page) - 1;
-		}
-	}
-	printk("%ld pages of RAM\n", total);
-	printk("%ld reserved pages\n", reserved);
-	printk("%ld pages shared\n", shared);
-	printk("%ld pages swap cached\n", cached);
-}
-
-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean.  We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &page->flags))
-		clear_bit(PG_arch_1, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
-{
-	clear_page(page);
-
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-	/*
-	 * We shouldnt have to do this, but some versions of glibc
-	 * require it (ld.so assumes zero filled pages are icache clean)
-	 * - Anton
-	 */
-
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &pg->flags))
-		clear_bit(PG_arch_1, &pg->flags);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-		    struct page *pg)
-{
-	copy_page(vto, vfrom);
-
-	/*
-	 * We should be able to use the following optimisation, however
-	 * there are two problems.
-	 * Firstly a bug in some versions of binutils meant PLT sections
-	 * were not marked executable.
-	 * Secondly the first word in the GOT section is blrl, used
-	 * to establish the GOT address. Until recently the GOT was
-	 * not marked executable.
-	 * - Anton
-	 */
-#if 0
-	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
-		return;
-#endif
-
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &pg->flags))
-		clear_bit(PG_arch_1, &pg->flags);
-}
-
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
-			     unsigned long addr, int len)
-{
-	unsigned long maddr;
-
-	maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
-	flush_icache_range(maddr, maddr + len);
-}
-EXPORT_SYMBOL(flush_icache_user_range);
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux PTE.
- * 
- * This must always be called with the mm->page_table_lock held
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
-		      pte_t pte)
-{
-	unsigned long vsid;
-	void *pgdir;
-	pte_t *ptep;
-	int local = 0;
-	cpumask_t tmp;
-	unsigned long flags;
-
-	/* handle i-cache coherency */
-	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
-	    !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			if (!PageReserved(page)
-			    && !test_bit(PG_arch_1, &page->flags)) {
-				__flush_dcache_icache(page_address(page));
-				set_bit(PG_arch_1, &page->flags);
-			}
-		}
-	}
-
-	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
-	if (!pte_young(pte))
-		return;
-
-	pgdir = vma->vm_mm->pgd;
-	if (pgdir == NULL)
-		return;
-
-	ptep = find_linux_pte(pgdir, ea);
-	if (!ptep)
-		return;
-
-	vsid = get_vsid(vma->vm_mm->context.id, ea);
-
-	local_irq_save(flags);
-	tmp = cpumask_of_cpu(smp_processor_id());
-	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
-		local = 1;
-
-	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
-		    0x300, local);
-	local_irq_restore(flags);
-}
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
deleted file mode 100644
index a8816e0f6a8..00000000000
--- a/arch/powerpc/mm/mmu_context.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * This file contains the routines for handling the MMU on those
- * PowerPC implementations where the MMU substantially follows the
- * architecture specification.  This includes the 6xx, 7xx, 7xxx,
- * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-
-mm_context_t next_mmu_context;
-unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
-#ifdef FEW_CONTEXTS
-atomic_t nr_free_contexts;
-struct mm_struct *context_mm[LAST_CONTEXT+1];
-void steal_context(void);
-#endif /* FEW_CONTEXTS */
-
-/*
- * Initialize the context management stuff.
- */
-void __init
-mmu_context_init(void)
-{
-	/*
-	 * Some processors have too few contexts to reserve one for
-	 * init_mm, and require using context 0 for a normal task.
-	 * Other processors reserve the use of context zero for the kernel.
-	 * This code assumes FIRST_CONTEXT < 32.
-	 */
-	context_map[0] = (1 << FIRST_CONTEXT) - 1;
-	next_mmu_context = FIRST_CONTEXT;
-#ifdef FEW_CONTEXTS
-	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
-#endif /* FEW_CONTEXTS */
-}
-
-#ifdef FEW_CONTEXTS
-/*
- * Steal a context from a task that has one at the moment.
- * This is only used on 8xx and 4xx and we presently assume that
- * they don't do SMP.  If they do then this will have to check
- * whether the MM we steal is in use.
- * We also assume that this is only used on systems that don't
- * use an MMU hash table - this is true for 8xx and 4xx.
- * This isn't an LRU system, it just frees up each context in
- * turn (sort-of pseudo-random replacement :).  This would be the
- * place to implement an LRU scheme if anyone was motivated to do it.
- *  -- paulus
- */
-void
-steal_context(void)
-{
-	struct mm_struct *mm;
-
-	/* free up context `next_mmu_context' */
-	/* if we shouldn't free context 0, don't... */
-	if (next_mmu_context < FIRST_CONTEXT)
-		next_mmu_context = FIRST_CONTEXT;
-	mm = context_mm[next_mmu_context];
-	flush_tlb_mm(mm);
-	destroy_context(mm);
-}
-#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c
deleted file mode 100644
index 714a84dd8d5..00000000000
--- a/arch/powerpc/mm/mmu_context64.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  MMU context allocation for 64-bit kernels.
- *
- *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-
-#include <asm/mmu_context.h>
-
-static DEFINE_SPINLOCK(mmu_context_lock);
-static DEFINE_IDR(mmu_context_idr);
-
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	int index;
-	int err;
-
-again:
-	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
-		return -ENOMEM;
-
-	spin_lock(&mmu_context_lock);
-	err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
-	spin_unlock(&mmu_context_lock);
-
-	if (err == -EAGAIN)
-		goto again;
-	else if (err)
-		return err;
-
-	if (index > MAX_CONTEXT) {
-		idr_remove(&mmu_context_idr, index);
-		return -ENOMEM;
-	}
-
-	mm->context.id = index;
-
-	return 0;
-}
-
-void destroy_context(struct mm_struct *mm)
-{
-	spin_lock(&mmu_context_lock);
-	idr_remove(&mmu_context_idr, mm->context.id);
-	spin_unlock(&mmu_context_lock);
-
-	mm->context.id = NO_CONTEXT;
-}
diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c
new file mode 100644
index 00000000000..a8816e0f6a8
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context_32.c
@@ -0,0 +1,86 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+mm_context_t next_mmu_context;
+unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+#ifdef FEW_CONTEXTS
+atomic_t nr_free_contexts;
+struct mm_struct *context_mm[LAST_CONTEXT+1];
+void steal_context(void);
+#endif /* FEW_CONTEXTS */
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init
+mmu_context_init(void)
+{
+	/*
+	 * Some processors have too few contexts to reserve one for
+	 * init_mm, and require using context 0 for a normal task.
+	 * Other processors reserve the use of context zero for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+#ifdef FEW_CONTEXTS
+	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
+#endif /* FEW_CONTEXTS */
+}
+
+#ifdef FEW_CONTEXTS
+/*
+ * Steal a context from a task that has one at the moment.
+ * This is only used on 8xx and 4xx and we presently assume that
+ * they don't do SMP.  If they do then this will have to check
+ * whether the MM we steal is in use.
+ * We also assume that this is only used on systems that don't
+ * use an MMU hash table - this is true for 8xx and 4xx.
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone was motivated to do it.
+ *  -- paulus
+ */
+void
+steal_context(void)
+{
+	struct mm_struct *mm;
+
+	/* free up context `next_mmu_context' */
+	/* if we shouldn't free context 0, don't... */
+	if (next_mmu_context < FIRST_CONTEXT)
+		next_mmu_context = FIRST_CONTEXT;
+	mm = context_mm[next_mmu_context];
+	flush_tlb_mm(mm);
+	destroy_context(mm);
+}
+#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c
new file mode 100644
index 00000000000..714a84dd8d5
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context_64.c
@@ -0,0 +1,63 @@
+/*
+ *  MMU context allocation for 64-bit kernels.
+ *
+ *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <asm/mmu_context.h>
+
+static DEFINE_SPINLOCK(mmu_context_lock);
+static DEFINE_IDR(mmu_context_idr);
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int index;
+	int err;
+
+again:
+	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&mmu_context_lock);
+	err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
+	spin_unlock(&mmu_context_lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > MAX_CONTEXT) {
+		idr_remove(&mmu_context_idr, index);
+		return -ENOMEM;
+	}
+
+	mm->context.id = index;
+
+	return 0;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+	spin_lock(&mmu_context_lock);
+	idr_remove(&mmu_context_idr, mm->context.id);
+	spin_unlock(&mmu_context_lock);
+
+	mm->context.id = NO_CONTEXT;
+}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
deleted file mode 100644
index 5792e533916..00000000000
--- a/arch/powerpc/mm/pgtable.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * This file contains the routines setting up the linux page tables.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/io.h>
-
-#include "mmu_decl.h"
-
-unsigned long ioremap_base;
-unsigned long ioremap_bot;
-int io_bat_index;
-
-#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
-#define HAVE_BATS	1
-#endif
-
-#if defined(CONFIG_FSL_BOOKE)
-#define HAVE_TLBCAM	1
-#endif
-
-extern char etext[], _stext[];
-
-#ifdef CONFIG_SMP
-extern void hash_page_sync(void);
-#endif
-
-#ifdef HAVE_BATS
-extern unsigned long v_mapped_by_bats(unsigned long va);
-extern unsigned long p_mapped_by_bats(unsigned long pa);
-void setbat(int index, unsigned long virt, unsigned long phys,
-	    unsigned int size, int flags);
-
-#else /* !HAVE_BATS */
-#define v_mapped_by_bats(x)	(0UL)
-#define p_mapped_by_bats(x)	(0UL)
-#endif /* HAVE_BATS */
-
-#ifdef HAVE_TLBCAM
-extern unsigned int tlbcam_index;
-extern unsigned long v_mapped_by_tlbcam(unsigned long va);
-extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
-#else /* !HAVE_TLBCAM */
-#define v_mapped_by_tlbcam(x)	(0UL)
-#define p_mapped_by_tlbcam(x)	(0UL)
-#endif /* HAVE_TLBCAM */
-
-#ifdef CONFIG_PTE_64BIT
-/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
-#define PGDIR_ORDER	1
-#else
-#define PGDIR_ORDER	0
-#endif
-
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-	pgd_t *ret;
-
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
-	return ret;
-}
-
-void pgd_free(pgd_t *pgd)
-{
-	free_pages((unsigned long)pgd, PGDIR_ORDER);
-}
-
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
-	pte_t *pte;
-	extern int mem_init_done;
-	extern void *early_get_page(void);
-
-	if (mem_init_done) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-	} else {
-		pte = (pte_t *)early_get_page();
-		if (pte)
-			clear_page(pte);
-	}
-	return pte;
-}
-
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	struct page *ptepage;
-
-#ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
-#else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
-#endif
-
-	ptepage = alloc_pages(flags, 0);
-	if (ptepage)
-		clear_highpage(ptepage);
-	return ptepage;
-}
-
-void pte_free_kernel(pte_t *pte)
-{
-#ifdef CONFIG_SMP
-	hash_page_sync();
-#endif
-	free_page((unsigned long)pte);
-}
-
-void pte_free(struct page *ptepage)
-{
-#ifdef CONFIG_SMP
-	hash_page_sync();
-#endif
-	__free_page(ptepage);
-}
-
-#ifndef CONFIG_PHYS_64BIT
-void __iomem *
-ioremap(phys_addr_t addr, unsigned long size)
-{
-	return __ioremap(addr, size, _PAGE_NO_CACHE);
-}
-#else /* CONFIG_PHYS_64BIT */
-void __iomem *
-ioremap64(unsigned long long addr, unsigned long size)
-{
-	return __ioremap(addr, size, _PAGE_NO_CACHE);
-}
-
-void __iomem *
-ioremap(phys_addr_t addr, unsigned long size)
-{
-	phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
-
-	return ioremap64(addr64, size);
-}
-#endif /* CONFIG_PHYS_64BIT */
-
-void __iomem *
-__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
-{
-	unsigned long v, i;
-	phys_addr_t p;
-	int err;
-
-	/*
-	 * Choose an address to map it to.
-	 * Once the vmalloc system is running, we use it.
-	 * Before then, we use space going down from ioremap_base
-	 * (ioremap_bot records where we're up to).
-	 */
-	p = addr & PAGE_MASK;
-	size = PAGE_ALIGN(addr + size) - p;
-
-	/*
-	 * If the address lies within the first 16 MB, assume it's in ISA
-	 * memory space
-	 */
-	if (p < 16*1024*1024)
-		p += _ISA_MEM_BASE;
-
-	/*
-	 * Don't allow anybody to remap normal RAM that we're using.
-	 * mem_init() sets high_memory so only do the check after that.
-	 */
-	if (mem_init_done && (p < virt_to_phys(high_memory))) {
-		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
-		       __builtin_return_address(0));
-		return NULL;
-	}
-
-	if (size == 0)
-		return NULL;
-
-	/*
-	 * Is it already mapped?  Perhaps overlapped by a previous
-	 * BAT mapping.  If the whole area is mapped then we're done,
-	 * otherwise remap it since we want to keep the virt addrs for
-	 * each request contiguous.
-	 *
-	 * We make the assumption here that if the bottom and top
-	 * of the range we want are mapped then it's mapped to the
-	 * same virt address (and this is contiguous).
-	 *  -- Cort
-	 */
-	if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
-		goto out;
-
-	if ((v = p_mapped_by_tlbcam(p)))
-		goto out;
-
-	if (mem_init_done) {
-		struct vm_struct *area;
-		area = get_vm_area(size, VM_IOREMAP);
-		if (area == 0)
-			return NULL;
-		v = (unsigned long) area->addr;
-	} else {
-		v = (ioremap_bot -= size);
-	}
-
-	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= _PAGE_KERNEL;
-	if (flags & _PAGE_NO_CACHE)
-		flags |= _PAGE_GUARDED;
-
-	/*
-	 * Should check if it is a candidate for a BAT mapping
-	 */
-
-	err = 0;
-	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
-		err = map_page(v+i, p+i, flags);
-	if (err) {
-		if (mem_init_done)
-			vunmap((void *)v);
-		return NULL;
-	}
-
-out:
-	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-	/*
-	 * If mapped by BATs then there is nothing to do.
-	 * Calling vfree() generates a benign warning.
-	 */
-	if (v_mapped_by_bats((unsigned long)addr)) return;
-
-	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
-		vunmap((void *) (PAGE_MASK & (unsigned long)addr));
-}
-
-void __iomem *ioport_map(unsigned long port, unsigned int len)
-{
-	return (void __iomem *) (port + _IO_BASE);
-}
-
-void ioport_unmap(void __iomem *addr)
-{
-	/* Nothing to do */
-}
-EXPORT_SYMBOL(ioport_map);
-EXPORT_SYMBOL(ioport_unmap);
-
-int
-map_page(unsigned long va, phys_addr_t pa, int flags)
-{
-	pmd_t *pd;
-	pte_t *pg;
-	int err = -ENOMEM;
-
-	spin_lock(&init_mm.page_table_lock);
-	/* Use upper 10 bits of VA to index the first level map */
-	pd = pmd_offset(pgd_offset_k(va), va);
-	/* Use middle 10 bits of VA to index the second-level map */
-	pg = pte_alloc_kernel(&init_mm, pd, va);
-	if (pg != 0) {
-		err = 0;
-		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
-		if (mem_init_done)
-			flush_HPTE(0, va, pmd_val(*pd));
-	}
-	spin_unlock(&init_mm.page_table_lock);
-	return err;
-}
-
-/*
- * Map in all of physical memory starting at KERNELBASE.
- */
-void __init mapin_ram(void)
-{
-	unsigned long v, p, s, f;
-
-	s = mmu_mapin_ram();
-	v = KERNELBASE + s;
-	p = PPC_MEMSTART + s;
-	for (; s < total_lowmem; s += PAGE_SIZE) {
-		if ((char *) v >= _stext && (char *) v < etext)
-			f = _PAGE_RAM_TEXT;
-		else
-			f = _PAGE_RAM;
-		map_page(v, p, f);
-		v += PAGE_SIZE;
-		p += PAGE_SIZE;
-	}
-}
-
-/* is x a power of 2? */
-#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
-
-/* is x a power of 4? */
-#define is_power_of_4(x)	((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
-
-/*
- * Set up a mapping for a block of I/O.
- * virt, phys, size must all be page-aligned.
- * This should only be called before ioremap is called.
- */
-void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
-			     unsigned int size, int flags)
-{
-	int i;
-
-	if (virt > KERNELBASE && virt < ioremap_bot)
-		ioremap_bot = ioremap_base = virt;
-
-#ifdef HAVE_BATS
-	/*
-	 * Use a BAT for this if possible...
-	 */
-	if (io_bat_index < 2 && is_power_of_2(size)
-	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
-		setbat(io_bat_index, virt, phys, size, flags);
-		++io_bat_index;
-		return;
-	}
-#endif /* HAVE_BATS */
-
-#ifdef HAVE_TLBCAM
-	/*
-	 * Use a CAM for this if possible...
-	 */
-	if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
-	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
-		settlbcam(tlbcam_index, virt, phys, size, flags, 0);
-		++tlbcam_index;
-		return;
-	}
-#endif /* HAVE_TLBCAM */
-
-	/* No BATs available, put it in the page tables. */
-	for (i = 0; i < size; i += PAGE_SIZE)
-		map_page(virt + i, phys + i, flags);
-}
-
-/* Scan the real Linux page tables and return a PTE pointer for
- * a virtual address in a context.
- * Returns true (1) if PTE was found, zero otherwise.  The pointer to
- * the PTE pointer is unmodified if PTE is not found.
- */
-int
-get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
-{
-        pgd_t	*pgd;
-        pmd_t	*pmd;
-        pte_t	*pte;
-        int     retval = 0;
-
-        pgd = pgd_offset(mm, addr & PAGE_MASK);
-        if (pgd) {
-                pmd = pmd_offset(pgd, addr & PAGE_MASK);
-                if (pmd_present(*pmd)) {
-                        pte = pte_offset_map(pmd, addr & PAGE_MASK);
-                        if (pte) {
-				retval = 1;
-				*ptep = pte;
-				/* XXX caller needs to do pte_unmap, yuck */
-                        }
-                }
-        }
-        return(retval);
-}
-
-/* Find physical address for this virtual address.  Normally used by
- * I/O functions, but anyone can call it.
- */
-unsigned long iopa(unsigned long addr)
-{
-	unsigned long pa;
-
-	/* I don't know why this won't work on PMacs or CHRP.  It
-	 * appears there is some bug, or there is some implicit
-	 * mapping done not properly represented by BATs or in page
-	 * tables.......I am actively working on resolving this, but
-	 * can't hold up other stuff.  -- Dan
-	 */
-	pte_t *pte;
-	struct mm_struct *mm;
-
-	/* Check the BATs */
-	pa = v_mapped_by_bats(addr);
-	if (pa)
-		return pa;
-
-	/* Allow mapping of user addresses (within the thread)
-	 * for DMA if necessary.
-	 */
-	if (addr < TASK_SIZE)
-		mm = current->mm;
-	else
-		mm = &init_mm;
-
-	pa = 0;
-	if (get_pteptr(mm, addr, &pte)) {
-		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
-		pte_unmap(pte);
-	}
-
-	return(pa);
-}
-
-/* This is will find the virtual address for a physical one....
- * Swiped from APUS, could be dangerous :-).
- * This is only a placeholder until I really find a way to make this
- * work.  -- Dan
- */
-unsigned long
-mm_ptov (unsigned long paddr)
-{
-	unsigned long ret;
-#if 0
-	if (paddr < 16*1024*1024)
-		ret = ZTWO_VADDR(paddr);
-	else {
-		int i;
-
-		for (i = 0; i < kmap_chunk_count;){
-			unsigned long phys = kmap_chunks[i++];
-			unsigned long size = kmap_chunks[i++];
-			unsigned long virt = kmap_chunks[i++];
-			if (paddr >= phys
-			    && paddr < (phys + size)){
-				ret = virt + paddr - phys;
-				goto exit;
-			}
-		}
-	
-		ret = (unsigned long) __va(paddr);
-	}
-exit:
-#ifdef DEBUGPV
-	printk ("PTOV(%lx)=%lx\n", paddr, ret);
-#endif
-#else
-	ret = (unsigned long)paddr + KERNELBASE;
-#endif
-	return ret;
-}
-
diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c
deleted file mode 100644
index 724f97e5dee..00000000000
--- a/arch/powerpc/mm/pgtable64.c
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- *  This file contains ioremap and related functions for 64-bit machines.
- *
- *  Derived from arch/ppc64/mm/init.c
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  Dave Engebretsen <engebret@us.ibm.com>
- *      Rework for PPC64 port.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/nodemask.h>
-#include <linux/module.h>
-
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/lmb.h>
-#include <asm/rtas.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/tlb.h>
-#include <asm/eeh.h>
-#include <asm/processor.h>
-#include <asm/mmzone.h>
-#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/iommu.h>
-#include <asm/abs_addr.h>
-#include <asm/vdso.h>
-#include <asm/imalloc.h>
-
-#if PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
-
-int mem_init_done;
-unsigned long ioremap_bot = IMALLOC_BASE;
-static unsigned long phbs_io_bot = PHBS_IO_BASE;
-
-extern pgd_t swapper_pg_dir[];
-extern struct task_struct *current_set[NR_CPUS];
-
-unsigned long klimit = (unsigned long)_end;
-
-/* max amount of RAM to use */
-unsigned long __max_memory;
-
-/* info on what we think the IO hole is */
-unsigned long 	io_hole_start;
-unsigned long	io_hole_size;
-
-#ifdef CONFIG_PPC_ISERIES
-
-void __iomem *ioremap(unsigned long addr, unsigned long size)
-{
-	return (void __iomem *)addr;
-}
-
-extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
-		       unsigned long flags)
-{
-	return (void __iomem *)addr;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-	return;
-}
-
-#else
-
-/*
- * map_io_page currently only called by __ioremap
- * map_io_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
- */
-static int map_io_page(unsigned long ea, unsigned long pa, int flags)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-	unsigned long vsid;
-
-	if (mem_init_done) {
-		spin_lock(&init_mm.page_table_lock);
-		pgdp = pgd_offset_k(ea);
-		pudp = pud_alloc(&init_mm, pgdp, ea);
-		if (!pudp)
-			return -ENOMEM;
-		pmdp = pmd_alloc(&init_mm, pudp, ea);
-		if (!pmdp)
-			return -ENOMEM;
-		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
-		if (!ptep)
-			return -ENOMEM;
-		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-							  __pgprot(flags)));
-		spin_unlock(&init_mm.page_table_lock);
-	} else {
-		unsigned long va, vpn, hash, hpteg;
-
-		/*
-		 * If the mm subsystem is not fully up, we cannot create a
-		 * linux page table entry for this mapping.  Simply bolt an
-		 * entry in the hardware page table.
-		 */
-		vsid = get_kernel_vsid(ea);
-		va = (vsid << 28) | (ea & 0xFFFFFFF);
-		vpn = va >> PAGE_SHIFT;
-
-		hash = hpt_hash(vpn, 0);
-
-		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
-		/* Panic if a pte grpup is full */
-		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
-				       HPTE_V_BOLTED,
-				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
-		    == -1) {
-			panic("map_io_page: could not insert mapping");
-		}
-	}
-	return 0;
-}
-
-
-static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
-			    unsigned long ea, unsigned long size,
-			    unsigned long flags)
-{
-	unsigned long i;
-
-	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= pgprot_val(PAGE_KERNEL);
-
-	for (i = 0; i < size; i += PAGE_SIZE)
-		if (map_io_page(ea+i, pa+i, flags))
-			return NULL;
-
-	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
-}
-
-
-void __iomem *
-ioremap(unsigned long addr, unsigned long size)
-{
-	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
-}
-
-void __iomem * __ioremap(unsigned long addr, unsigned long size,
-			 unsigned long flags)
-{
-	unsigned long pa, ea;
-	void __iomem *ret;
-
-	/*
-	 * Choose an address to map it to.
-	 * Once the imalloc system is running, we use it.
-	 * Before that, we map using addresses going
-	 * up from ioremap_bot.  imalloc will use
-	 * the addresses from ioremap_bot through
-	 * IMALLOC_END
-	 * 
-	 */
-	pa = addr & PAGE_MASK;
-	size = PAGE_ALIGN(addr + size) - pa;
-
-	if (size == 0)
-		return NULL;
-
-	if (mem_init_done) {
-		struct vm_struct *area;
-		area = im_get_free_area(size);
-		if (area == NULL)
-			return NULL;
-		ea = (unsigned long)(area->addr);
-		ret = __ioremap_com(addr, pa, ea, size, flags);
-		if (!ret)
-			im_free(area->addr);
-	} else {
-		ea = ioremap_bot;
-		ret = __ioremap_com(addr, pa, ea, size, flags);
-		if (ret)
-			ioremap_bot += size;
-	}
-	return ret;
-}
-
-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
-
-int __ioremap_explicit(unsigned long pa, unsigned long ea,
-		       unsigned long size, unsigned long flags)
-{
-	struct vm_struct *area;
-	void __iomem *ret;
-	
-	/* For now, require page-aligned values for pa, ea, and size */
-	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
-	    !IS_PAGE_ALIGNED(size)) {
-		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
-		return 1;
-	}
-	
-	if (!mem_init_done) {
-		/* Two things to consider in this case:
-		 * 1) No records will be kept (imalloc, etc) that the region
-		 *    has been remapped
-		 * 2) It won't be easy to iounmap() the region later (because
-		 *    of 1)
-		 */
-		;
-	} else {
-		area = im_get_area(ea, size,
-			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
-		if (area == NULL) {
-			/* Expected when PHB-dlpar is in play */
-			return 1;
-		}
-		if (ea != (unsigned long) area->addr) {
-			printk(KERN_ERR "unexpected addr return from "
-			       "im_get_area\n");
-			return 1;
-		}
-	}
-	
-	ret = __ioremap_com(pa, pa, ea, size, flags);
-	if (ret == NULL) {
-		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
-		return 1;
-	}
-	if (ret != (void *) ea) {
-		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
-		return 1;
-	}
-
-	return 0;
-}
-
-/*  
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- * This code is modeled after vmalloc code - unmap_vm_area()
- *
- * XXX	what about calls before mem_init_done (ie python_countermeasures())
- */
-void iounmap(volatile void __iomem *token)
-{
-	void *addr;
-
-	if (!mem_init_done)
-		return;
-	
-	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
-
-	im_free(addr);
-}
-
-static int iounmap_subset_regions(unsigned long addr, unsigned long size)
-{
-	struct vm_struct *area;
-
-	/* Check whether subsets of this region exist */
-	area = im_get_area(addr, size, IM_REGION_SUPERSET);
-	if (area == NULL)
-		return 1;
-
-	while (area) {
-		iounmap((void __iomem *) area->addr);
-		area = im_get_area(addr, size,
-				IM_REGION_SUPERSET);
-	}
-
-	return 0;
-}
-
-int iounmap_explicit(volatile void __iomem *start, unsigned long size)
-{
-	struct vm_struct *area;
-	unsigned long addr;
-	int rc;
-	
-	addr = (unsigned long __force) start & PAGE_MASK;
-
-	/* Verify that the region either exists or is a subset of an existing
-	 * region.  In the latter case, split the parent region to create 
-	 * the exact region 
-	 */
-	area = im_get_area(addr, size, 
-			    IM_REGION_EXISTS | IM_REGION_SUBSET);
-	if (area == NULL) {
-		/* Determine whether subset regions exist.  If so, unmap */
-		rc = iounmap_subset_regions(addr, size);
-		if (rc) {
-			printk(KERN_ERR
-			       "%s() cannot unmap nonexistent range 0x%lx\n",
- 				__FUNCTION__, addr);
-			return 1;
-		}
-	} else {
-		iounmap((void __iomem *) area->addr);
-	}
-	/*
-	 * FIXME! This can't be right:
-	iounmap(area->addr);
-	 * Maybe it should be "iounmap(area);"
-	 */
-	return 0;
-}
-
-#endif
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
new file mode 100644
index 00000000000..5792e533916
--- /dev/null
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -0,0 +1,469 @@
+/*
+ * This file contains the routines setting up the linux page tables.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+
+#include "mmu_decl.h"
+
+unsigned long ioremap_base;
+unsigned long ioremap_bot;
+int io_bat_index;
+
+#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+#define HAVE_BATS	1
+#endif
+
+#if defined(CONFIG_FSL_BOOKE)
+#define HAVE_TLBCAM	1
+#endif
+
+extern char etext[], _stext[];
+
+#ifdef CONFIG_SMP
+extern void hash_page_sync(void);
+#endif
+
+#ifdef HAVE_BATS
+extern unsigned long v_mapped_by_bats(unsigned long va);
+extern unsigned long p_mapped_by_bats(unsigned long pa);
+void setbat(int index, unsigned long virt, unsigned long phys,
+	    unsigned int size, int flags);
+
+#else /* !HAVE_BATS */
+#define v_mapped_by_bats(x)	(0UL)
+#define p_mapped_by_bats(x)	(0UL)
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+extern unsigned int tlbcam_index;
+extern unsigned long v_mapped_by_tlbcam(unsigned long va);
+extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
+#else /* !HAVE_TLBCAM */
+#define v_mapped_by_tlbcam(x)	(0UL)
+#define p_mapped_by_tlbcam(x)	(0UL)
+#endif /* HAVE_TLBCAM */
+
+#ifdef CONFIG_PTE_64BIT
+/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
+#define PGDIR_ORDER	1
+#else
+#define PGDIR_ORDER	0
+#endif
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret;
+
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	return ret;
+}
+
+void pgd_free(pgd_t *pgd)
+{
+	free_pages((unsigned long)pgd, PGDIR_ORDER);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	pte_t *pte;
+	extern int mem_init_done;
+	extern void *early_get_page(void);
+
+	if (mem_init_done) {
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	} else {
+		pte = (pte_t *)early_get_page();
+		if (pte)
+			clear_page(pte);
+	}
+	return pte;
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *ptepage;
+
+#ifdef CONFIG_HIGHPTE
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+#else
+	int flags = GFP_KERNEL | __GFP_REPEAT;
+#endif
+
+	ptepage = alloc_pages(flags, 0);
+	if (ptepage)
+		clear_highpage(ptepage);
+	return ptepage;
+}
+
+void pte_free_kernel(pte_t *pte)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	free_page((unsigned long)pte);
+}
+
+void pte_free(struct page *ptepage)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	__free_page(ptepage);
+}
+
+#ifndef CONFIG_PHYS_64BIT
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+#else /* CONFIG_PHYS_64BIT */
+void __iomem *
+ioremap64(unsigned long long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
+
+	return ioremap64(addr64, size);
+}
+#endif /* CONFIG_PHYS_64BIT */
+
+void __iomem *
+__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+	unsigned long v, i;
+	phys_addr_t p;
+	int err;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from ioremap_base
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+	/*
+	 * If the address lies within the first 16 MB, assume it's in ISA
+	 * memory space
+	 */
+	if (p < 16*1024*1024)
+		p += _ISA_MEM_BASE;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 */
+	if (mem_init_done && (p < virt_to_phys(high_memory))) {
+		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
+		       __builtin_return_address(0));
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped?  Perhaps overlapped by a previous
+	 * BAT mapping.  If the whole area is mapped then we're done,
+	 * otherwise remap it since we want to keep the virt addrs for
+	 * each request contiguous.
+	 *
+	 * We make the assumption here that if the bottom and top
+	 * of the range we want are mapped then it's mapped to the
+	 * same virt address (and this is contiguous).
+	 *  -- Cort
+	 */
+	if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
+		goto out;
+
+	if ((v = p_mapped_by_tlbcam(p)))
+		goto out;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == 0)
+			return NULL;
+		v = (unsigned long) area->addr;
+	} else {
+		v = (ioremap_bot -= size);
+	}
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+	if (flags & _PAGE_NO_CACHE)
+		flags |= _PAGE_GUARDED;
+
+	/*
+	 * Should check if it is a candidate for a BAT mapping
+	 */
+
+	err = 0;
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
+		err = map_page(v+i, p+i, flags);
+	if (err) {
+		if (mem_init_done)
+			vunmap((void *)v);
+		return NULL;
+	}
+
+out:
+	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	/*
+	 * If mapped by BATs then there is nothing to do.
+	 * Calling vfree() generates a benign warning.
+	 */
+	if (v_mapped_by_bats((unsigned long)addr)) return;
+
+	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
+		vunmap((void *) (PAGE_MASK & (unsigned long)addr));
+}
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) (port + _IO_BASE);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+int
+map_page(unsigned long va, phys_addr_t pa, int flags)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+
+	spin_lock(&init_mm.page_table_lock);
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_offset(pgd_offset_k(va), va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	pg = pte_alloc_kernel(&init_mm, pd, va);
+	if (pg != 0) {
+		err = 0;
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
+		if (mem_init_done)
+			flush_HPTE(0, va, pmd_val(*pd));
+	}
+	spin_unlock(&init_mm.page_table_lock);
+	return err;
+}
+
+/*
+ * Map in all of physical memory starting at KERNELBASE.
+ */
+void __init mapin_ram(void)
+{
+	unsigned long v, p, s, f;
+
+	s = mmu_mapin_ram();
+	v = KERNELBASE + s;
+	p = PPC_MEMSTART + s;
+	for (; s < total_lowmem; s += PAGE_SIZE) {
+		if ((char *) v >= _stext && (char *) v < etext)
+			f = _PAGE_RAM_TEXT;
+		else
+			f = _PAGE_RAM;
+		map_page(v, p, f);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+/* is x a power of 2? */
+#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
+
+/* is x a power of 4? */
+#define is_power_of_4(x)	((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
+
+/*
+ * Set up a mapping for a block of I/O.
+ * virt, phys, size must all be page-aligned.
+ * This should only be called before ioremap is called.
+ */
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags)
+{
+	int i;
+
+	if (virt > KERNELBASE && virt < ioremap_bot)
+		ioremap_bot = ioremap_base = virt;
+
+#ifdef HAVE_BATS
+	/*
+	 * Use a BAT for this if possible...
+	 */
+	if (io_bat_index < 2 && is_power_of_2(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		setbat(io_bat_index, virt, phys, size, flags);
+		++io_bat_index;
+		return;
+	}
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+	/*
+	 * Use a CAM for this if possible...
+	 */
+	if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		settlbcam(tlbcam_index, virt, phys, size, flags, 0);
+		++tlbcam_index;
+		return;
+	}
+#endif /* HAVE_TLBCAM */
+
+	/* No BATs available, put it in the page tables. */
+	for (i = 0; i < size; i += PAGE_SIZE)
+		map_page(virt + i, phys + i, flags);
+}
+
+/* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+ * the PTE pointer is unmodified if PTE is not found.
+ */
+int
+get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+{
+        pgd_t	*pgd;
+        pmd_t	*pmd;
+        pte_t	*pte;
+        int     retval = 0;
+
+        pgd = pgd_offset(mm, addr & PAGE_MASK);
+        if (pgd) {
+                pmd = pmd_offset(pgd, addr & PAGE_MASK);
+                if (pmd_present(*pmd)) {
+                        pte = pte_offset_map(pmd, addr & PAGE_MASK);
+                        if (pte) {
+				retval = 1;
+				*ptep = pte;
+				/* XXX caller needs to do pte_unmap, yuck */
+                        }
+                }
+        }
+        return(retval);
+}
+
+/* Find physical address for this virtual address.  Normally used by
+ * I/O functions, but anyone can call it.
+ */
+unsigned long iopa(unsigned long addr)
+{
+	unsigned long pa;
+
+	/* I don't know why this won't work on PMacs or CHRP.  It
+	 * appears there is some bug, or there is some implicit
+	 * mapping done not properly represented by BATs or in page
+	 * tables.......I am actively working on resolving this, but
+	 * can't hold up other stuff.  -- Dan
+	 */
+	pte_t *pte;
+	struct mm_struct *mm;
+
+	/* Check the BATs */
+	pa = v_mapped_by_bats(addr);
+	if (pa)
+		return pa;
+
+	/* Allow mapping of user addresses (within the thread)
+	 * for DMA if necessary.
+	 */
+	if (addr < TASK_SIZE)
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	pa = 0;
+	if (get_pteptr(mm, addr, &pte)) {
+		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+		pte_unmap(pte);
+	}
+
+	return(pa);
+}
+
+/* This is will find the virtual address for a physical one....
+ * Swiped from APUS, could be dangerous :-).
+ * This is only a placeholder until I really find a way to make this
+ * work.  -- Dan
+ */
+unsigned long
+mm_ptov (unsigned long paddr)
+{
+	unsigned long ret;
+#if 0
+	if (paddr < 16*1024*1024)
+		ret = ZTWO_VADDR(paddr);
+	else {
+		int i;
+
+		for (i = 0; i < kmap_chunk_count;){
+			unsigned long phys = kmap_chunks[i++];
+			unsigned long size = kmap_chunks[i++];
+			unsigned long virt = kmap_chunks[i++];
+			if (paddr >= phys
+			    && paddr < (phys + size)){
+				ret = virt + paddr - phys;
+				goto exit;
+			}
+		}
+	
+		ret = (unsigned long) __va(paddr);
+	}
+exit:
+#ifdef DEBUGPV
+	printk ("PTOV(%lx)=%lx\n", paddr, ret);
+#endif
+#else
+	ret = (unsigned long)paddr + KERNELBASE;
+#endif
+	return ret;
+}
+
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
new file mode 100644
index 00000000000..724f97e5dee
--- /dev/null
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -0,0 +1,357 @@
+/*
+ *  This file contains ioremap and related functions for 64-bit machines.
+ *
+ *  Derived from arch/ppc64/mm/init.c
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+#ifdef CONFIG_PPC_ISERIES
+
+void __iomem *ioremap(unsigned long addr, unsigned long size)
+{
+	return (void __iomem *)addr;
+}
+
+extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
+		       unsigned long flags)
+{
+	return (void __iomem *)addr;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	return;
+}
+
+#else
+
+/*
+ * map_io_page currently only called by __ioremap
+ * map_io_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+static int map_io_page(unsigned long ea, unsigned long pa, int flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long vsid;
+
+	if (mem_init_done) {
+		spin_lock(&init_mm.page_table_lock);
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+		spin_unlock(&init_mm.page_table_lock);
+	} else {
+		unsigned long va, vpn, hash, hpteg;
+
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 */
+		vsid = get_kernel_vsid(ea);
+		va = (vsid << 28) | (ea & 0xFFFFFFF);
+		vpn = va >> PAGE_SHIFT;
+
+		hash = hpt_hash(vpn, 0);
+
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+		/* Panic if a pte grpup is full */
+		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
+				       HPTE_V_BOLTED,
+				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
+		    == -1) {
+			panic("map_io_page: could not insert mapping");
+		}
+	}
+	return 0;
+}
+
+
+static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
+			    unsigned long ea, unsigned long size,
+			    unsigned long flags)
+{
+	unsigned long i;
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= pgprot_val(PAGE_KERNEL);
+
+	for (i = 0; i < size; i += PAGE_SIZE)
+		if (map_io_page(ea+i, pa+i, flags))
+			return NULL;
+
+	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+}
+
+
+void __iomem *
+ioremap(unsigned long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+void __iomem * __ioremap(unsigned long addr, unsigned long size,
+			 unsigned long flags)
+{
+	unsigned long pa, ea;
+	void __iomem *ret;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the imalloc system is running, we use it.
+	 * Before that, we map using addresses going
+	 * up from ioremap_bot.  imalloc will use
+	 * the addresses from ioremap_bot through
+	 * IMALLOC_END
+	 * 
+	 */
+	pa = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - pa;
+
+	if (size == 0)
+		return NULL;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = im_get_free_area(size);
+		if (area == NULL)
+			return NULL;
+		ea = (unsigned long)(area->addr);
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (!ret)
+			im_free(area->addr);
+	} else {
+		ea = ioremap_bot;
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (ret)
+			ioremap_bot += size;
+	}
+	return ret;
+}
+
+#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+
+int __ioremap_explicit(unsigned long pa, unsigned long ea,
+		       unsigned long size, unsigned long flags)
+{
+	struct vm_struct *area;
+	void __iomem *ret;
+	
+	/* For now, require page-aligned values for pa, ea, and size */
+	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+	    !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (!mem_init_done) {
+		/* Two things to consider in this case:
+		 * 1) No records will be kept (imalloc, etc) that the region
+		 *    has been remapped
+		 * 2) It won't be easy to iounmap() the region later (because
+		 *    of 1)
+		 */
+		;
+	} else {
+		area = im_get_area(ea, size,
+			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
+		if (area == NULL) {
+			/* Expected when PHB-dlpar is in play */
+			return 1;
+		}
+		if (ea != (unsigned long) area->addr) {
+			printk(KERN_ERR "unexpected addr return from "
+			       "im_get_area\n");
+			return 1;
+		}
+	}
+	
+	ret = __ioremap_com(pa, pa, ea, size, flags);
+	if (ret == NULL) {
+		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+		return 1;
+	}
+	if (ret != (void *) ea) {
+		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*  
+ * Unmap an IO region and remove it from imalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ * This code is modeled after vmalloc code - unmap_vm_area()
+ *
+ * XXX	what about calls before mem_init_done (ie python_countermeasures())
+ */
+void iounmap(volatile void __iomem *token)
+{
+	void *addr;
+
+	if (!mem_init_done)
+		return;
+	
+	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
+
+	im_free(addr);
+}
+
+static int iounmap_subset_regions(unsigned long addr, unsigned long size)
+{
+	struct vm_struct *area;
+
+	/* Check whether subsets of this region exist */
+	area = im_get_area(addr, size, IM_REGION_SUPERSET);
+	if (area == NULL)
+		return 1;
+
+	while (area) {
+		iounmap((void __iomem *) area->addr);
+		area = im_get_area(addr, size,
+				IM_REGION_SUPERSET);
+	}
+
+	return 0;
+}
+
+int iounmap_explicit(volatile void __iomem *start, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	int rc;
+	
+	addr = (unsigned long __force) start & PAGE_MASK;
+
+	/* Verify that the region either exists or is a subset of an existing
+	 * region.  In the latter case, split the parent region to create 
+	 * the exact region 
+	 */
+	area = im_get_area(addr, size, 
+			    IM_REGION_EXISTS | IM_REGION_SUBSET);
+	if (area == NULL) {
+		/* Determine whether subset regions exist.  If so, unmap */
+		rc = iounmap_subset_regions(addr, size);
+		if (rc) {
+			printk(KERN_ERR
+			       "%s() cannot unmap nonexistent range 0x%lx\n",
+ 				__FUNCTION__, addr);
+			return 1;
+		}
+	} else {
+		iounmap((void __iomem *) area->addr);
+	}
+	/*
+	 * FIXME! This can't be right:
+	iounmap(area->addr);
+	 * Maybe it should be "iounmap(area);"
+	 */
+	return 0;
+}
+
+#endif
+
+EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c
deleted file mode 100644
index cef9e83cc7e..00000000000
--- a/arch/powerpc/mm/ppc_mmu.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * This file contains the routines for handling the MMU on those
- * PowerPC implementations where the MMU substantially follows the
- * architecture specification.  This includes the 6xx, 7xx, 7xxx,
- * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-
-#include <asm/prom.h>
-#include <asm/mmu.h>
-#include <asm/machdep.h>
-#include <asm/lmb.h>
-
-#include "mmu_decl.h"
-
-PTE *Hash, *Hash_end;
-unsigned long Hash_size, Hash_mask;
-unsigned long _SDR1;
-
-union ubat {			/* BAT register values to be loaded */
-	BAT	bat;
-#ifdef CONFIG_PPC64BRIDGE
-	u64	word[2];
-#else
-	u32	word[2];
-#endif
-} BATS[4][2];			/* 4 pairs of IBAT, DBAT */
-
-struct batrange {		/* stores address ranges mapped by BATs */
-	unsigned long start;
-	unsigned long limit;
-	unsigned long phys;
-} bat_addrs[4];
-
-/*
- * Return PA for this VA if it is mapped by a BAT, or 0
- */
-unsigned long v_mapped_by_bats(unsigned long va)
-{
-	int b;
-	for (b = 0; b < 4; ++b)
-		if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
-			return bat_addrs[b].phys + (va - bat_addrs[b].start);
-	return 0;
-}
-
-/*
- * Return VA for a given PA or 0 if not mapped
- */
-unsigned long p_mapped_by_bats(unsigned long pa)
-{
-	int b;
-	for (b = 0; b < 4; ++b)
-		if (pa >= bat_addrs[b].phys
-	    	    && pa < (bat_addrs[b].limit-bat_addrs[b].start)
-		              +bat_addrs[b].phys)
-			return bat_addrs[b].start+(pa-bat_addrs[b].phys);
-	return 0;
-}
-
-unsigned long __init mmu_mapin_ram(void)
-{
-#ifdef CONFIG_POWER4
-	return 0;
-#else
-	unsigned long tot, bl, done;
-	unsigned long max_size = (256<<20);
-	unsigned long align;
-
-	if (__map_without_bats)
-		return 0;
-
-	/* Set up BAT2 and if necessary BAT3 to cover RAM. */
-
-	/* Make sure we don't map a block larger than the
-	   smallest alignment of the physical address. */
-	/* alignment of PPC_MEMSTART */
-	align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
-	/* set BAT block size to MIN(max_size, align) */
-	if (align && align < max_size)
-		max_size = align;
-
-	tot = total_lowmem;
-	for (bl = 128<<10; bl < max_size; bl <<= 1) {
-		if (bl * 2 > tot)
-			break;
-	}
-
-	setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
-	done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
-	if ((done < tot) && !bat_addrs[3].limit) {
-		/* use BAT3 to cover a bit more */
-		tot -= done;
-		for (bl = 128<<10; bl < max_size; bl <<= 1)
-			if (bl * 2 > tot)
-				break;
-		setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
-		done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
-	}
-
-	return done;
-#endif
-}
-
-/*
- * Set up one of the I/D BAT (block address translation) register pairs.
- * The parameters are not checked; in particular size must be a power
- * of 2 between 128k and 256M.
- */
-void __init setbat(int index, unsigned long virt, unsigned long phys,
-		   unsigned int size, int flags)
-{
-	unsigned int bl;
-	int wimgxpp;
-	union ubat *bat = BATS[index];
-
-	if (((flags & _PAGE_NO_CACHE) == 0) &&
-	    cpu_has_feature(CPU_FTR_NEED_COHERENT))
-		flags |= _PAGE_COHERENT;
-
-	bl = (size >> 17) - 1;
-	if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
-		/* 603, 604, etc. */
-		/* Do DBAT first */
-		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
-				   | _PAGE_COHERENT | _PAGE_GUARDED);
-		wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
-		bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
-		bat[1].word[1] = phys | wimgxpp;
-#ifndef CONFIG_KGDB /* want user access for breakpoints */
-		if (flags & _PAGE_USER)
-#endif
-			bat[1].bat.batu.vp = 1;
-		if (flags & _PAGE_GUARDED) {
-			/* G bit must be zero in IBATs */
-			bat[0].word[0] = bat[0].word[1] = 0;
-		} else {
-			/* make IBAT same as DBAT */
-			bat[0] = bat[1];
-		}
-	} else {
-		/* 601 cpu */
-		if (bl > BL_8M)
-			bl = BL_8M;
-		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
-				   | _PAGE_COHERENT);
-		wimgxpp |= (flags & _PAGE_RW)?
-			((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
-		bat->word[0] = virt | wimgxpp | 4;	/* Ks=0, Ku=1 */
-		bat->word[1] = phys | bl | 0x40;	/* V=1 */
-	}
-
-	bat_addrs[index].start = virt;
-	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
-	bat_addrs[index].phys = phys;
-}
-
-/*
- * Initialize the hash table and patch the instructions in hashtable.S.
- */
-void __init MMU_init_hw(void)
-{
-	unsigned int hmask, mb, mb2;
-	unsigned int n_hpteg, lg_n_hpteg;
-
-	extern unsigned int hash_page_patch_A[];
-	extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
-	extern unsigned int hash_page[];
-	extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
-
-	if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
-		/*
-		 * Put a blr (procedure return) instruction at the
-		 * start of hash_page, since we can still get DSI
-		 * exceptions on a 603.
-		 */
-		hash_page[0] = 0x4e800020;
-		flush_icache_range((unsigned long) &hash_page[0],
-				   (unsigned long) &hash_page[1]);
-		return;
-	}
-
-	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
-
-#ifdef CONFIG_PPC64BRIDGE
-#define LG_HPTEG_SIZE	7		/* 128 bytes per HPTEG */
-#define SDR1_LOW_BITS	(lg_n_hpteg - 11)
-#define MIN_N_HPTEG	2048		/* min 256kB hash table */
-#else
-#define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
-#define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
-#define MIN_N_HPTEG	1024		/* min 64kB hash table */
-#endif
-
-	/*
-	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
-	 * This is less than the recommended amount, but then
-	 * Linux ain't AIX.
-	 */
-	n_hpteg = total_memory / (PAGE_SIZE * 8);
-	if (n_hpteg < MIN_N_HPTEG)
-		n_hpteg = MIN_N_HPTEG;
-	lg_n_hpteg = __ilog2(n_hpteg);
-	if (n_hpteg & (n_hpteg - 1)) {
-		++lg_n_hpteg;		/* round up if not power of 2 */
-		n_hpteg = 1 << lg_n_hpteg;
-	}
-	Hash_size = n_hpteg << LG_HPTEG_SIZE;
-
-	/*
-	 * Find some memory for the hash table.
-	 */
-	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = __va(lmb_alloc_base(Hash_size, Hash_size,
-				   __initial_memory_limit));
-	cacheable_memzero(Hash, Hash_size);
-	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
-
-	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
-
-	printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
-	       total_memory >> 20, Hash_size >> 10, Hash);
-
-
-	/*
-	 * Patch up the instructions in hashtable.S:create_hpte
-	 */
-	if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
-	Hash_mask = n_hpteg - 1;
-	hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
-	mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
-	if (lg_n_hpteg > 16)
-		mb2 = 16 - LG_HPTEG_SIZE;
-
-	hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
-		| ((unsigned int)(Hash) >> 16);
-	hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
-	hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
-	hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
-	hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
-
-	/*
-	 * Ensure that the locations we've patched have been written
-	 * out from the data cache and invalidated in the instruction
-	 * cache, on those machines with split caches.
-	 */
-	flush_icache_range((unsigned long) &hash_page_patch_A[0],
-			   (unsigned long) &hash_page_patch_C[1]);
-
-	/*
-	 * Patch up the instructions in hashtable.S:flush_hash_page
-	 */
-	flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
-		| ((unsigned int)(Hash) >> 16);
-	flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
-	flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
-	flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
-	flush_icache_range((unsigned long) &flush_hash_patch_A[0],
-			   (unsigned long) &flush_hash_patch_B[1]);
-
-	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
-}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
new file mode 100644
index 00000000000..cef9e83cc7e
--- /dev/null
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -0,0 +1,285 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/prom.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+#include <asm/lmb.h>
+
+#include "mmu_decl.h"
+
+PTE *Hash, *Hash_end;
+unsigned long Hash_size, Hash_mask;
+unsigned long _SDR1;
+
+union ubat {			/* BAT register values to be loaded */
+	BAT	bat;
+#ifdef CONFIG_PPC64BRIDGE
+	u64	word[2];
+#else
+	u32	word[2];
+#endif
+} BATS[4][2];			/* 4 pairs of IBAT, DBAT */
+
+struct batrange {		/* stores address ranges mapped by BATs */
+	unsigned long start;
+	unsigned long limit;
+	unsigned long phys;
+} bat_addrs[4];
+
+/*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+unsigned long v_mapped_by_bats(unsigned long va)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+			return bat_addrs[b].phys + (va - bat_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_bats(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (pa >= bat_addrs[b].phys
+	    	    && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+		              +bat_addrs[b].phys)
+			return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+	return 0;
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+#ifdef CONFIG_POWER4
+	return 0;
+#else
+	unsigned long tot, bl, done;
+	unsigned long max_size = (256<<20);
+	unsigned long align;
+
+	if (__map_without_bats)
+		return 0;
+
+	/* Set up BAT2 and if necessary BAT3 to cover RAM. */
+
+	/* Make sure we don't map a block larger than the
+	   smallest alignment of the physical address. */
+	/* alignment of PPC_MEMSTART */
+	align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
+	/* set BAT block size to MIN(max_size, align) */
+	if (align && align < max_size)
+		max_size = align;
+
+	tot = total_lowmem;
+	for (bl = 128<<10; bl < max_size; bl <<= 1) {
+		if (bl * 2 > tot)
+			break;
+	}
+
+	setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
+	done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
+	if ((done < tot) && !bat_addrs[3].limit) {
+		/* use BAT3 to cover a bit more */
+		tot -= done;
+		for (bl = 128<<10; bl < max_size; bl <<= 1)
+			if (bl * 2 > tot)
+				break;
+		setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
+		done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
+	}
+
+	return done;
+#endif
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+void __init setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags)
+{
+	unsigned int bl;
+	int wimgxpp;
+	union ubat *bat = BATS[index];
+
+	if (((flags & _PAGE_NO_CACHE) == 0) &&
+	    cpu_has_feature(CPU_FTR_NEED_COHERENT))
+		flags |= _PAGE_COHERENT;
+
+	bl = (size >> 17) - 1;
+	if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+		/* 603, 604, etc. */
+		/* Do DBAT first */
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT | _PAGE_GUARDED);
+		wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+		bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+		bat[1].word[1] = phys | wimgxpp;
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+		if (flags & _PAGE_USER)
+#endif
+			bat[1].bat.batu.vp = 1;
+		if (flags & _PAGE_GUARDED) {
+			/* G bit must be zero in IBATs */
+			bat[0].word[0] = bat[0].word[1] = 0;
+		} else {
+			/* make IBAT same as DBAT */
+			bat[0] = bat[1];
+		}
+	} else {
+		/* 601 cpu */
+		if (bl > BL_8M)
+			bl = BL_8M;
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT);
+		wimgxpp |= (flags & _PAGE_RW)?
+			((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
+		bat->word[0] = virt | wimgxpp | 4;	/* Ks=0, Ku=1 */
+		bat->word[1] = phys | bl | 0x40;	/* V=1 */
+	}
+
+	bat_addrs[index].start = virt;
+	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+	bat_addrs[index].phys = phys;
+}
+
+/*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+void __init MMU_init_hw(void)
+{
+	unsigned int hmask, mb, mb2;
+	unsigned int n_hpteg, lg_n_hpteg;
+
+	extern unsigned int hash_page_patch_A[];
+	extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
+	extern unsigned int hash_page[];
+	extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
+
+	if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
+		/*
+		 * Put a blr (procedure return) instruction at the
+		 * start of hash_page, since we can still get DSI
+		 * exceptions on a 603.
+		 */
+		hash_page[0] = 0x4e800020;
+		flush_icache_range((unsigned long) &hash_page[0],
+				   (unsigned long) &hash_page[1]);
+		return;
+	}
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+#ifdef CONFIG_PPC64BRIDGE
+#define LG_HPTEG_SIZE	7		/* 128 bytes per HPTEG */
+#define SDR1_LOW_BITS	(lg_n_hpteg - 11)
+#define MIN_N_HPTEG	2048		/* min 256kB hash table */
+#else
+#define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
+#define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
+#define MIN_N_HPTEG	1024		/* min 64kB hash table */
+#endif
+
+	/*
+	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+	 * This is less than the recommended amount, but then
+	 * Linux ain't AIX.
+	 */
+	n_hpteg = total_memory / (PAGE_SIZE * 8);
+	if (n_hpteg < MIN_N_HPTEG)
+		n_hpteg = MIN_N_HPTEG;
+	lg_n_hpteg = __ilog2(n_hpteg);
+	if (n_hpteg & (n_hpteg - 1)) {
+		++lg_n_hpteg;		/* round up if not power of 2 */
+		n_hpteg = 1 << lg_n_hpteg;
+	}
+	Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+	/*
+	 * Find some memory for the hash table.
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+	Hash = __va(lmb_alloc_base(Hash_size, Hash_size,
+				   __initial_memory_limit));
+	cacheable_memzero(Hash, Hash_size);
+	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+
+	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
+
+	printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
+	       total_memory >> 20, Hash_size >> 10, Hash);
+
+
+	/*
+	 * Patch up the instructions in hashtable.S:create_hpte
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
+	Hash_mask = n_hpteg - 1;
+	hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+	mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+	if (lg_n_hpteg > 16)
+		mb2 = 16 - LG_HPTEG_SIZE;
+
+	hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
+	hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
+	hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
+
+	/*
+	 * Ensure that the locations we've patched have been written
+	 * out from the data cache and invalidated in the instruction
+	 * cache, on those machines with split caches.
+	 */
+	flush_icache_range((unsigned long) &hash_page_patch_A[0],
+			   (unsigned long) &hash_page_patch_C[1]);
+
+	/*
+	 * Patch up the instructions in hashtable.S:flush_hash_page
+	 */
+	flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
+	flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
+	flush_icache_range((unsigned long) &flush_hash_patch_A[0],
+			   (unsigned long) &flush_hash_patch_B[1]);
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
+}
diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c
deleted file mode 100644
index 6c3dc3c44c8..00000000000
--- a/arch/powerpc/mm/tlb.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * This file contains the routines for TLB flushing.
- * On machines where the MMU uses a hash table to store virtual to
- * physical translations, these routines flush entries from the
- * hash table also.
- *  -- paulus
- *
- *  Derived from arch/ppc/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <asm/tlbflush.h>
-#include <asm/tlb.h>
-
-#include "mmu_decl.h"
-
-/*
- * Called when unmapping pages to flush entries from the TLB/hash table.
- */
-void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
-{
-	unsigned long ptephys;
-
-	if (Hash != 0) {
-		ptephys = __pa(ptep) & PAGE_MASK;
-		flush_hash_pages(mm->context, addr, ptephys, 1);
-	}
-}
-
-/*
- * Called by ptep_set_access_flags, must flush on CPUs for which the
- * DSI handler can't just "fixup" the TLB on a write fault
- */
-void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
-{
-	if (Hash != 0)
-		return;
-	_tlbie(addr);
-}
-
-/*
- * Called at the end of a mmu_gather operation to make sure the
- * TLB flush is completely done.
- */
-void tlb_flush(struct mmu_gather *tlb)
-{
-	if (Hash == 0) {
-		/*
-		 * 603 needs to flush the whole TLB here since
-		 * it doesn't use a hash table.
-		 */
-		_tlbia();
-	}
-}
-
-/*
- * TLB flushing:
- *
- *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
- *  - flush_tlb_page(vma, vmaddr) flushes one page
- *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_kernel_range(start, end) flushes kernel pages
- *
- * since the hardware hash table functions as an extension of the
- * tlb as far as the linux tables are concerned, flush it too.
- *    -- Cort
- */
-
-/*
- * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
- * the cache operations on the bus.  Hence we need to use an IPI
- * to get the other CPU(s) to invalidate their TLBs.
- */
-#ifdef CONFIG_SMP_750
-#define FINISH_FLUSH	smp_send_tlb_invalidate(0)
-#else
-#define FINISH_FLUSH	do { } while (0)
-#endif
-
-static void flush_range(struct mm_struct *mm, unsigned long start,
-			unsigned long end)
-{
-	pmd_t *pmd;
-	unsigned long pmd_end;
-	int count;
-	unsigned int ctx = mm->context;
-
-	if (Hash == 0) {
-		_tlbia();
-		return;
-	}
-	start &= PAGE_MASK;
-	if (start >= end)
-		return;
-	end = (end - 1) | ~PAGE_MASK;
-	pmd = pmd_offset(pgd_offset(mm, start), start);
-	for (;;) {
-		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
-		if (pmd_end > end)
-			pmd_end = end;
-		if (!pmd_none(*pmd)) {
-			count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
-			flush_hash_pages(ctx, start, pmd_val(*pmd), count);
-		}
-		if (pmd_end == end)
-			break;
-		start = pmd_end + 1;
-		++pmd;
-	}
-}
-
-/*
- * Flush kernel TLB entries in the given range
- */
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	flush_range(&init_mm, start, end);
-	FINISH_FLUSH;
-}
-
-/*
- * Flush all the (user) entries for the address space described by mm.
- */
-void flush_tlb_mm(struct mm_struct *mm)
-{
-	struct vm_area_struct *mp;
-
-	if (Hash == 0) {
-		_tlbia();
-		return;
-	}
-
-	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
-		flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
-	FINISH_FLUSH;
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
-	struct mm_struct *mm;
-	pmd_t *pmd;
-
-	if (Hash == 0) {
-		_tlbie(vmaddr);
-		return;
-	}
-	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
-	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
-	if (!pmd_none(*pmd))
-		flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
-	FINISH_FLUSH;
-}
-
-/*
- * For each address in the range, find the pte for the address
- * and check _PAGE_HASHPTE bit; if it is set, find and destroy
- * the corresponding HPTE.
- */
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-		     unsigned long end)
-{
-	flush_range(vma->vm_mm, start, end);
-	FINISH_FLUSH;
-}
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
new file mode 100644
index 00000000000..6c3dc3c44c8
--- /dev/null
+++ b/arch/powerpc/mm/tlb_32.c
@@ -0,0 +1,183 @@
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU uses a hash table to store virtual to
+ * physical translations, these routines flush entries from the
+ * hash table also.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+/*
+ * Called when unmapping pages to flush entries from the TLB/hash table.
+ */
+void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
+{
+	unsigned long ptephys;
+
+	if (Hash != 0) {
+		ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(mm->context, addr, ptephys, 1);
+	}
+}
+
+/*
+ * Called by ptep_set_access_flags, must flush on CPUs for which the
+ * DSI handler can't just "fixup" the TLB on a write fault
+ */
+void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (Hash != 0)
+		return;
+	_tlbie(addr);
+}
+
+/*
+ * Called at the end of a mmu_gather operation to make sure the
+ * TLB flush is completely done.
+ */
+void tlb_flush(struct mmu_gather *tlb)
+{
+	if (Hash == 0) {
+		/*
+		 * 603 needs to flush the whole TLB here since
+		 * it doesn't use a hash table.
+		 */
+		_tlbia();
+	}
+}
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * since the hardware hash table functions as an extension of the
+ * tlb as far as the linux tables are concerned, flush it too.
+ *    -- Cort
+ */
+
+/*
+ * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
+ * the cache operations on the bus.  Hence we need to use an IPI
+ * to get the other CPU(s) to invalidate their TLBs.
+ */
+#ifdef CONFIG_SMP_750
+#define FINISH_FLUSH	smp_send_tlb_invalidate(0)
+#else
+#define FINISH_FLUSH	do { } while (0)
+#endif
+
+static void flush_range(struct mm_struct *mm, unsigned long start,
+			unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long pmd_end;
+	int count;
+	unsigned int ctx = mm->context;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+	start &= PAGE_MASK;
+	if (start >= end)
+		return;
+	end = (end - 1) | ~PAGE_MASK;
+	pmd = pmd_offset(pgd_offset(mm, start), start);
+	for (;;) {
+		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
+		if (pmd_end > end)
+			pmd_end = end;
+		if (!pmd_none(*pmd)) {
+			count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
+			flush_hash_pages(ctx, start, pmd_val(*pmd), count);
+		}
+		if (pmd_end == end)
+			break;
+		start = pmd_end + 1;
+		++pmd;
+	}
+}
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_range(&init_mm, start, end);
+	FINISH_FLUSH;
+}
+
+/*
+ * Flush all the (user) entries for the address space described by mm.
+ */
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *mp;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+
+	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+		flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
+	FINISH_FLUSH;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	if (Hash == 0) {
+		_tlbie(vmaddr);
+		return;
+	}
+	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
+	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+	if (!pmd_none(*pmd))
+		flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
+	FINISH_FLUSH;
+}
+
+/*
+ * For each address in the range, find the pte for the address
+ * and check _PAGE_HASHPTE bit; if it is set, find and destroy
+ * the corresponding HPTE.
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	flush_range(vma->vm_mm, start, end);
+	FINISH_FLUSH;
+}
-- 
cgit v1.2.3-70-g09d2


From ab1f9dac6eea25ee59e4c8e1cf0b7476afbbfe07 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 10 Oct 2005 21:58:35 +1000
Subject: powerpc: Merge arch/ppc64/mm to arch/powerpc/mm

This moves the remaining files in arch/ppc64/mm to arch/powerpc/mm,
and arranges that we use them when compiling with ARCH=ppc64.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/Makefile         |   8 +-
 arch/powerpc/mm/hash_low_64.S    | 288 +++++++++++++
 arch/powerpc/mm/hash_native_64.c | 446 ++++++++++++++++++++
 arch/powerpc/mm/hash_utils_64.c  | 438 ++++++++++++++++++++
 arch/powerpc/mm/hugetlbpage.c    | 745 +++++++++++++++++++++++++++++++++
 arch/powerpc/mm/imalloc.c        | 317 ++++++++++++++
 arch/powerpc/mm/init_64.c        |  36 --
 arch/powerpc/mm/mem.c            |  11 +-
 arch/powerpc/mm/mmap.c           |  86 ++++
 arch/powerpc/mm/mmu_decl.h       |  18 +-
 arch/powerpc/mm/numa.c           | 779 +++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/pgtable_64.c     |  34 +-
 arch/powerpc/mm/slb.c            | 158 +++++++
 arch/powerpc/mm/slb_low.S        | 151 +++++++
 arch/powerpc/mm/stab.c           | 279 +++++++++++++
 arch/powerpc/mm/tlb_64.c         | 196 +++++++++
 arch/ppc64/Makefile              |   2 +-
 arch/ppc64/mm/Makefile           |  11 -
 arch/ppc64/mm/fault.c            | 333 ---------------
 arch/ppc64/mm/hash_low.S         | 288 -------------
 arch/ppc64/mm/hash_native.c      | 446 --------------------
 arch/ppc64/mm/hash_utils.c       | 438 --------------------
 arch/ppc64/mm/hugetlbpage.c      | 745 ---------------------------------
 arch/ppc64/mm/imalloc.c          | 317 --------------
 arch/ppc64/mm/init.c             | 870 ---------------------------------------
 arch/ppc64/mm/mmap.c             |  86 ----
 arch/ppc64/mm/numa.c             | 779 -----------------------------------
 arch/ppc64/mm/slb.c              | 158 -------
 arch/ppc64/mm/slb_low.S          | 151 -------
 arch/ppc64/mm/stab.c             | 279 -------------
 arch/ppc64/mm/tlb.c              | 196 ---------
 31 files changed, 3920 insertions(+), 5169 deletions(-)
 create mode 100644 arch/powerpc/mm/hash_low_64.S
 create mode 100644 arch/powerpc/mm/hash_native_64.c
 create mode 100644 arch/powerpc/mm/hash_utils_64.c
 create mode 100644 arch/powerpc/mm/hugetlbpage.c
 create mode 100644 arch/powerpc/mm/imalloc.c
 create mode 100644 arch/powerpc/mm/mmap.c
 create mode 100644 arch/powerpc/mm/numa.c
 create mode 100644 arch/powerpc/mm/slb.c
 create mode 100644 arch/powerpc/mm/slb_low.S
 create mode 100644 arch/powerpc/mm/stab.c
 create mode 100644 arch/powerpc/mm/tlb_64.c
 delete mode 100644 arch/ppc64/mm/Makefile
 delete mode 100644 arch/ppc64/mm/fault.c
 delete mode 100644 arch/ppc64/mm/hash_low.S
 delete mode 100644 arch/ppc64/mm/hash_native.c
 delete mode 100644 arch/ppc64/mm/hash_utils.c
 delete mode 100644 arch/ppc64/mm/hugetlbpage.c
 delete mode 100644 arch/ppc64/mm/imalloc.c
 delete mode 100644 arch/ppc64/mm/init.c
 delete mode 100644 arch/ppc64/mm/mmap.c
 delete mode 100644 arch/ppc64/mm/numa.c
 delete mode 100644 arch/ppc64/mm/slb.c
 delete mode 100644 arch/ppc64/mm/slb_low.S
 delete mode 100644 arch/ppc64/mm/stab.c
 delete mode 100644 arch/ppc64/mm/tlb.c

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 35497deeb4b..612bc4ec72b 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,8 +5,14 @@
 obj-y				:= fault.o mem.o lmb.o
 obj-$(CONFIG_PPC32)		+= init_32.o pgtable_32.o mmu_context_32.o \
 				   tlb_32.o
-obj-$(CONFIG_PPC64)		+= init_64.o pgtable_64.o mmu_context_64.o
+hash-$(CONFIG_PPC_MULTIPLATFORM) := hash_native_64.o
+obj-$(CONFIG_PPC64)		+= init_64.o pgtable_64.o mmu_context_64.o \
+				   hash_utils_64.o hash_low_64.o tlb_64.o \
+				   slb_low.o slb.o stab.o mmap.o imalloc.o \
+				   $(hash-y)
 obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
 obj-$(CONFIG_40x)		+= 4xx_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
+obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
new file mode 100644
index 00000000000..d6ed9102eee
--- /dev/null
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -0,0 +1,288 @@
+/*
+ * ppc64 MMU hashtable management routines
+ *
+ * (c) Copyright IBM Corp. 2003
+ *
+ * Maintained by: Benjamin Herrenschmidt
+ *                <benh@kernel.crashing.org>
+ *
+ * This file is covered by the GNU Public Licence v2 as
+ * described in the kernel's COPYING file.
+ */
+
+#include <asm/reg.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+
+	.text
+
+/*
+ * Stackframe:
+ *		
+ *         +-> Back chain			(SP + 256)
+ *         |   General register save area	(SP + 112)
+ *         |   Parameter save area		(SP + 48)
+ *         |   TOC save area			(SP + 40)
+ *         |   link editor doubleword		(SP + 32)
+ *         |   compiler doubleword		(SP + 24)
+ *         |   LR save area			(SP + 16)
+ *         |   CR save area			(SP + 8)
+ * SP ---> +-- Back chain			(SP + 0)
+ */
+#define STACKFRAMESIZE	256
+
+/* Save parameters offsets */
+#define STK_PARM(i)	(STACKFRAMESIZE + 48 + ((i)-3)*8)
+
+/* Save non-volatile offsets */
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+/*
+ * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
+ *		pte_t *ptep, unsigned long trap, int local)
+ *
+ * Adds a page to the hash table. This is the non-LPAR version for now
+ */
+
+_GLOBAL(__hash_page)
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-STACKFRAMESIZE(r1)
+	/* Save all params that we need after a function call */
+	std	r6,STK_PARM(r6)(r1)
+	std	r8,STK_PARM(r8)(r1)
+	
+	/* Add _PAGE_PRESENT to access */
+	ori	r4,r4,_PAGE_PRESENT
+
+	/* Save non-volatile registers.
+	 * r31 will hold "old PTE"
+	 * r30 is "new PTE"
+	 * r29 is "va"
+	 * r28 is a hash value
+	 * r27 is hashtab mask (maybe dynamic patched instead ?)
+	 */
+	std	r27,STK_REG(r27)(r1)
+	std	r28,STK_REG(r28)(r1)
+	std	r29,STK_REG(r29)(r1)
+	std	r30,STK_REG(r30)(r1)
+	std	r31,STK_REG(r31)(r1)
+	
+	/* Step 1:
+	 *
+	 * Check permissions, atomically mark the linux PTE busy
+	 * and hashed.
+	 */ 
+1:
+	ldarx	r31,0,r6
+	/* Check access rights (access & ~(pte_val(*ptep))) */
+	andc.	r0,r4,r31
+	bne-	htab_wrong_access
+	/* Check if PTE is busy */
+	andi.	r0,r31,_PAGE_BUSY
+	/* If so, just bail out and refault if needed. Someone else
+	 * is changing this PTE anyway and might hash it.
+	 */
+	bne-	bail_ok
+	/* Prepare new PTE value (turn access RW into DIRTY, then
+	 * add BUSY,HASHPTE and ACCESSED)
+	 */
+	rlwinm	r30,r4,32-9+7,31-7,31-7	/* _PAGE_RW -> _PAGE_DIRTY */
+	or	r30,r30,r31
+	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+	/* Write the linux PTE atomically (setting busy) */
+	stdcx.	r30,0,r6
+	bne-	1b
+	isync
+
+	/* Step 2:
+	 *
+	 * Insert/Update the HPTE in the hash table. At this point,
+	 * r4 (access) is re-useable, we use it for the new HPTE flags
+	 */
+
+	/* Calc va and put it in r29 */
+	rldicr	r29,r5,28,63-28
+	rldicl	r3,r3,0,36
+	or	r29,r3,r29
+
+	/* Calculate hash value for primary slot and store it in r28 */
+	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
+	rldicl	r0,r3,64-12,48		/* (ea >> 12) & 0xffff */
+	xor	r28,r5,r0
+
+	/* Convert linux PTE bits into HW equivalents */
+	andi.	r3,r30,0x1fe		/* Get basic set of flags */
+	xori	r3,r3,HW_NO_EXEC	/* _PAGE_EXEC -> NOEXEC */
+	rlwinm	r0,r30,32-9+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
+	rlwinm	r4,r30,32-7+1,30,30	/* _PAGE_DIRTY -> _PAGE_USER (r4) */
+	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+	andc	r0,r30,r0		/* r0 = pte & ~r0 */
+	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
+
+	/* We eventually do the icache sync here (maybe inline that
+	 * code rather than call a C function...) 
+	 */
+BEGIN_FTR_SECTION
+	mr	r4,r30
+	mr	r5,r7
+	bl	.hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+	/* At this point, r3 contains new PP bits, save them in
+	 * place of "access" in the param area (sic)
+	 */
+	std	r3,STK_PARM(r4)(r1)
+
+	/* Get htab_hash_mask */
+	ld	r4,htab_hash_mask@got(2)
+	ld	r27,0(r4)	/* htab_hash_mask -> r27 */
+
+	/* Check if we may already be in the hashtable, in this case, we
+	 * go to out-of-line code to try to modify the HPTE
+	 */
+	andi.	r0,r31,_PAGE_HASHPTE
+	bne	htab_modify_pte
+
+htab_insert_pte:
+	/* Clear hpte bits in new pte (we also clear BUSY btw) and
+	 * add _PAGE_HASHPTE
+	 */
+	lis	r0,_PAGE_HPTEFLAGS@h
+	ori	r0,r0,_PAGE_HPTEFLAGS@l
+	andc	r30,r30,r0
+	ori	r30,r30,_PAGE_HASHPTE
+
+	/* page number in r5 */
+	rldicl	r5,r31,64-PTE_SHIFT,PTE_SHIFT
+
+	/* Calculate primary group hash */
+	and	r0,r28,r27
+	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */
+
+	/* Call ppc_md.hpte_insert */
+	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
+	mr	r4,r29			/* Retreive va */
+	li	r6,0			/* no vflags */
+_GLOBAL(htab_call_hpte_insert1)
+	bl	.			/* Will be patched by htab_finish_init() */
+	cmpdi	0,r3,0
+	bge	htab_pte_insert_ok	/* Insertion successful */
+	cmpdi	0,r3,-2			/* Critical failure */
+	beq-	htab_pte_insert_failure
+
+	/* Now try secondary slot */
+	
+	/* page number in r5 */
+	rldicl	r5,r31,64-PTE_SHIFT,PTE_SHIFT
+
+	/* Calculate secondary group hash */
+	andc	r0,r27,r28
+	rldicr	r3,r0,3,63-3	/* r0 = (~hash & mask) << 3 */
+	
+	/* Call ppc_md.hpte_insert */
+	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
+	mr	r4,r29			/* Retreive va */
+	li	r6,HPTE_V_SECONDARY@l	/* secondary slot */
+_GLOBAL(htab_call_hpte_insert2)
+	bl	.			/* Will be patched by htab_finish_init() */
+	cmpdi	0,r3,0
+	bge+	htab_pte_insert_ok	/* Insertion successful */
+	cmpdi	0,r3,-2			/* Critical failure */
+	beq-	htab_pte_insert_failure
+
+	/* Both are full, we need to evict something */
+	mftb	r0
+	/* Pick a random group based on TB */
+	andi.	r0,r0,1
+	mr	r5,r28
+	bne	2f
+	not	r5,r5
+2:	and	r0,r5,r27
+	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */	
+	/* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+	bl	.			/* Will be patched by htab_finish_init() */
+
+	/* Try all again */
+	b	htab_insert_pte	
+
+bail_ok:
+	li	r3,0
+	b	bail
+
+htab_pte_insert_ok:
+	/* Insert slot number & secondary bit in PTE */
+	rldimi	r30,r3,12,63-15
+		
+	/* Write out the PTE with a normal write
+	 * (maybe add eieio may be good still ?)
+	 */
+htab_write_out_pte:
+	ld	r6,STK_PARM(r6)(r1)
+	std	r30,0(r6)
+	li	r3, 0
+bail:
+	ld	r27,STK_REG(r27)(r1)
+	ld	r28,STK_REG(r28)(r1)
+	ld	r29,STK_REG(r29)(r1)
+	ld      r30,STK_REG(r30)(r1)
+	ld      r31,STK_REG(r31)(r1)
+	addi    r1,r1,STACKFRAMESIZE
+	ld      r0,16(r1)
+	mtlr    r0
+	blr
+
+htab_modify_pte:
+	/* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+	mr	r4,r3
+	rlwinm	r3,r31,32-12,29,31
+
+	/* Secondary group ? if yes, get a inverted hash value */
+	mr	r5,r28
+	andi.	r0,r31,_PAGE_SECONDARY
+	beq	1f
+	not	r5,r5
+1:
+	/* Calculate proper slot value for ppc_md.hpte_updatepp */
+	and	r0,r5,r27
+	rldicr	r0,r0,3,63-3	/* r0 = (hash & mask) << 3 */
+	add	r3,r0,r3	/* add slot idx */
+
+	/* Call ppc_md.hpte_updatepp */
+	mr	r5,r29			/* va */
+	li	r6,0			/* large is 0 */
+	ld	r7,STK_PARM(r8)(r1)	/* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+	bl	.			/* Will be patched by htab_finish_init() */
+
+	/* if we failed because typically the HPTE wasn't really here
+	 * we try an insertion. 
+	 */
+	cmpdi	0,r3,-1
+	beq-	htab_insert_pte
+
+	/* Clear the BUSY bit and Write out the PTE */
+	li	r0,_PAGE_BUSY
+	andc	r30,r30,r0
+	b	htab_write_out_pte
+
+htab_wrong_access:
+	/* Bail out clearing reservation */
+	stdcx.	r31,0,r6
+	li	r3,1
+	b	bail
+
+htab_pte_insert_failure:
+	/* Bail out restoring old PTE */
+	ld	r6,STK_PARM(r6)(r1)
+	std	r31,0(r6)
+	li	r3,-1
+	b	bail
+
+
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
new file mode 100644
index 00000000000..174d14576c2
--- /dev/null
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -0,0 +1,446 @@
+/*
+ * native hashtable management.
+ *
+ * SMP scalability work:
+ *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+
+#include <asm/abs_addr.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/cputable.h>
+
+#define HPTE_LOCK_BIT 3
+
+static DEFINE_SPINLOCK(native_tlbie_lock);
+
+static inline void native_lock_hpte(hpte_t *hptep)
+{
+	unsigned long *word = &hptep->v;
+
+	while (1) {
+		if (!test_and_set_bit(HPTE_LOCK_BIT, word))
+			break;
+		while(test_bit(HPTE_LOCK_BIT, word))
+			cpu_relax();
+	}
+}
+
+static inline void native_unlock_hpte(hpte_t *hptep)
+{
+	unsigned long *word = &hptep->v;
+
+	asm volatile("lwsync":::"memory");
+	clear_bit(HPTE_LOCK_BIT, word);
+}
+
+long native_hpte_insert(unsigned long hpte_group, unsigned long va,
+			unsigned long prpn, unsigned long vflags,
+			unsigned long rflags)
+{
+	hpte_t *hptep = htab_address + hpte_group;
+	unsigned long hpte_v, hpte_r;
+	int i;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		if (! (hptep->v & HPTE_V_VALID)) {
+			/* retry with lock held */
+			native_lock_hpte(hptep);
+			if (! (hptep->v & HPTE_V_VALID))
+				break;
+			native_unlock_hpte(hptep);
+		}
+
+		hptep++;
+	}
+
+	if (i == HPTES_PER_GROUP)
+		return -1;
+
+	hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
+	if (vflags & HPTE_V_LARGE)
+		va &= ~(1UL << HPTE_V_AVPN_SHIFT);
+	hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+
+	hptep->r = hpte_r;
+	/* Guarantee the second dword is visible before the valid bit */
+	__asm__ __volatile__ ("eieio" : : : "memory");
+	/*
+	 * Now set the first dword including the valid bit
+	 * NOTE: this also unlocks the hpte
+	 */
+	hptep->v = hpte_v;
+
+	__asm__ __volatile__ ("ptesync" : : : "memory");
+
+	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
+}
+
+static long native_hpte_remove(unsigned long hpte_group)
+{
+	hpte_t *hptep;
+	int i;
+	int slot_offset;
+	unsigned long hpte_v;
+
+	/* pick a random entry to start at */
+	slot_offset = mftb() & 0x7;
+
+	for (i = 0; i < HPTES_PER_GROUP; i++) {
+		hptep = htab_address + hpte_group + slot_offset;
+		hpte_v = hptep->v;
+
+		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
+			/* retry with lock held */
+			native_lock_hpte(hptep);
+			hpte_v = hptep->v;
+			if ((hpte_v & HPTE_V_VALID)
+			    && !(hpte_v & HPTE_V_BOLTED))
+				break;
+			native_unlock_hpte(hptep);
+		}
+
+		slot_offset++;
+		slot_offset &= 0x7;
+	}
+
+	if (i == HPTES_PER_GROUP)
+		return -1;
+
+	/* Invalidate the hpte. NOTE: this also unlocks it */
+	hptep->v = 0;
+
+	return i;
+}
+
+static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
+{
+	unsigned long old;
+	unsigned long *p = &addr->r;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3\n\
+		rldimi	%0,%2,0,61\n\
+		stdcx.	%0,0,%3\n\
+		bne	1b"
+	: "=&r" (old), "=m" (*p)
+	: "r" (pp), "r" (p), "m" (*p)
+	: "cc");
+}
+
+/*
+ * Only works on small pages. Yes its ugly to have to check each slot in
+ * the group but we only use this during bootup.
+ */
+static long native_hpte_find(unsigned long vpn)
+{
+	hpte_t *hptep;
+	unsigned long hash;
+	unsigned long i, j;
+	long slot;
+	unsigned long hpte_v;
+
+	hash = hpt_hash(vpn, 0);
+
+	for (j = 0; j < 2; j++) {
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			hptep = htab_address + slot;
+			hpte_v = hptep->v;
+
+			if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+			    && (hpte_v & HPTE_V_VALID)
+			    && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
+				/* HPTE matches */
+				if (j)
+					slot = -slot;
+				return slot;
+			}
+			++slot;
+		}
+		hash = ~hash;
+	}
+
+	return -1;
+}
+
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+				 unsigned long va, int large, int local)
+{
+	hpte_t *hptep = htab_address + slot;
+	unsigned long hpte_v;
+	unsigned long avpn = va >> 23;
+	int ret = 0;
+
+	if (large)
+		avpn &= ~1;
+
+	native_lock_hpte(hptep);
+
+	hpte_v = hptep->v;
+
+	/* Even if we miss, we need to invalidate the TLB */
+	if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+	    || !(hpte_v & HPTE_V_VALID)) {
+		native_unlock_hpte(hptep);
+		ret = -1;
+	} else {
+		set_pp_bit(newpp, hptep);
+		native_unlock_hpte(hptep);
+	}
+
+	/* Ensure it is out of the tlb too */
+	if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+		tlbiel(va);
+	} else {
+		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			spin_lock(&native_tlbie_lock);
+		tlbie(va, large);
+		if (lock_tlbie)
+			spin_unlock(&native_tlbie_lock);
+	}
+
+	return ret;
+}
+
+/*
+ * Update the page protection bits. Intended to be used to create
+ * guard pages for kernel data structures on pages which are bolted
+ * in the HPT. Assumes pages being operated on will not be stolen.
+ * Does not work on large pages.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+{
+	unsigned long vsid, va, vpn, flags = 0;
+	long slot;
+	hpte_t *hptep;
+	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+	vsid = get_kernel_vsid(ea);
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> PAGE_SHIFT;
+
+	slot = native_hpte_find(vpn);
+	if (slot == -1)
+		panic("could not find page to bolt\n");
+	hptep = htab_address + slot;
+
+	set_pp_bit(newpp, hptep);
+
+	/* Ensure it is out of the tlb too */
+	if (lock_tlbie)
+		spin_lock_irqsave(&native_tlbie_lock, flags);
+	tlbie(va, 0);
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&native_tlbie_lock, flags);
+}
+
+static void native_hpte_invalidate(unsigned long slot, unsigned long va,
+				    int large, int local)
+{
+	hpte_t *hptep = htab_address + slot;
+	unsigned long hpte_v;
+	unsigned long avpn = va >> 23;
+	unsigned long flags;
+	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+	if (large)
+		avpn &= ~1;
+
+	local_irq_save(flags);
+	native_lock_hpte(hptep);
+
+	hpte_v = hptep->v;
+
+	/* Even if we miss, we need to invalidate the TLB */
+	if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+	    || !(hpte_v & HPTE_V_VALID)) {
+		native_unlock_hpte(hptep);
+	} else {
+		/* Invalidate the hpte. NOTE: this also unlocks it */
+		hptep->v = 0;
+	}
+
+	/* Invalidate the tlb */
+	if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+		tlbiel(va);
+	} else {
+		if (lock_tlbie)
+			spin_lock(&native_tlbie_lock);
+		tlbie(va, large);
+		if (lock_tlbie)
+			spin_unlock(&native_tlbie_lock);
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * clear all mappings on kexec.  All cpus are in real mode (or they will
+ * be when they isi), and we are the only one left.  We rely on our kernel
+ * mapping being 0xC0's and the hardware ignoring those two real bits.
+ *
+ * TODO: add batching support when enabled.  remember, no dynamic memory here,
+ * athough there is the control page available...
+ */
+static void native_hpte_clear(void)
+{
+	unsigned long slot, slots, flags;
+	hpte_t *hptep = htab_address;
+	unsigned long hpte_v;
+	unsigned long pteg_count;
+
+	pteg_count = htab_hash_mask + 1;
+
+	local_irq_save(flags);
+
+	/* we take the tlbie lock and hold it.  Some hardware will
+	 * deadlock if we try to tlbie from two processors at once.
+	 */
+	spin_lock(&native_tlbie_lock);
+
+	slots = pteg_count * HPTES_PER_GROUP;
+
+	for (slot = 0; slot < slots; slot++, hptep++) {
+		/*
+		 * we could lock the pte here, but we are the only cpu
+		 * running,  right?  and for crash dump, we probably
+		 * don't want to wait for a maybe bad cpu.
+		 */
+		hpte_v = hptep->v;
+
+		if (hpte_v & HPTE_V_VALID) {
+			hptep->v = 0;
+			tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+		}
+	}
+
+	spin_unlock(&native_tlbie_lock);
+	local_irq_restore(flags);
+}
+
+static void native_flush_hash_range(unsigned long number, int local)
+{
+	unsigned long va, vpn, hash, secondary, slot, flags, avpn;
+	int i, j;
+	hpte_t *hptep;
+	unsigned long hpte_v;
+	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+	unsigned long large = batch->large;
+
+	local_irq_save(flags);
+
+	j = 0;
+	for (i = 0; i < number; i++) {
+		va = batch->vaddr[j];
+		if (large)
+			vpn = va >> HPAGE_SHIFT;
+		else
+			vpn = va >> PAGE_SHIFT;
+		hash = hpt_hash(vpn, large);
+		secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
+		if (secondary)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
+
+		hptep = htab_address + slot;
+
+		avpn = va >> 23;
+		if (large)
+			avpn &= ~0x1UL;
+
+		native_lock_hpte(hptep);
+
+		hpte_v = hptep->v;
+
+		/* Even if we miss, we need to invalidate the TLB */
+		if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+		    || !(hpte_v & HPTE_V_VALID)) {
+			native_unlock_hpte(hptep);
+		} else {
+			/* Invalidate the hpte. NOTE: this also unlocks it */
+			hptep->v = 0;
+		}
+
+		j++;
+	}
+
+	if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+		asm volatile("ptesync":::"memory");
+
+		for (i = 0; i < j; i++)
+			__tlbiel(batch->vaddr[i]);
+
+		asm volatile("ptesync":::"memory");
+	} else {
+		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			spin_lock(&native_tlbie_lock);
+
+		asm volatile("ptesync":::"memory");
+
+		for (i = 0; i < j; i++)
+			__tlbie(batch->vaddr[i], large);
+
+		asm volatile("eieio; tlbsync; ptesync":::"memory");
+
+		if (lock_tlbie)
+			spin_unlock(&native_tlbie_lock);
+	}
+
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PPC_PSERIES
+/* Disable TLB batching on nighthawk */
+static inline int tlb_batching_enabled(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	int enabled = 1;
+
+	if (root) {
+		const char *model = get_property(root, "model", NULL);
+		if (model && !strcmp(model, "IBM,9076-N81"))
+			enabled = 0;
+		of_node_put(root);
+	}
+
+	return enabled;
+}
+#else
+static inline int tlb_batching_enabled(void)
+{
+	return 1;
+}
+#endif
+
+void hpte_init_native(void)
+{
+	ppc_md.hpte_invalidate	= native_hpte_invalidate;
+	ppc_md.hpte_updatepp	= native_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
+	ppc_md.hpte_insert	= native_hpte_insert;
+	ppc_md.hpte_remove	= native_hpte_remove;
+	ppc_md.hpte_clear_all	= native_hpte_clear;
+	if (tlb_batching_enabled())
+		ppc_md.flush_hash_range = native_flush_hash_range;
+	htab_finish_init();
+}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
new file mode 100644
index 00000000000..35dd93eeaf4
--- /dev/null
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -0,0 +1,438 @@
+/*
+ * PowerPC64 port by Mike Corrigan and Dave Engebretsen
+ *   {mikejc|engebret}@us.ibm.com
+ *
+ *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * SMP scalability work:
+ *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * 
+ *    Module name: htab.c
+ *
+ *    Description:
+ *      PowerPC Hashed Page Table functions
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+
+#include <asm/ppcdebug.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/lmb.h>
+#include <asm/abs_addr.h>
+#include <asm/tlbflush.h>
+#include <asm/io.h>
+#include <asm/eeh.h>
+#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+#include <asm/cputable.h>
+#include <asm/abs_addr.h>
+#include <asm/sections.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * Note:  pte   --> Linux PTE
+ *        HPTE  --> PowerPC Hashed Page Table Entry
+ *
+ * Execution context:
+ *   htab_initialize is called with the MMU off (of course), but
+ *   the kernel has been copied down to zero so it can directly
+ *   reference global data.  At this point it is very difficult
+ *   to print debug info.
+ *
+ */
+
+#ifdef CONFIG_U3_DART
+extern unsigned long dart_tablebase;
+#endif /* CONFIG_U3_DART */
+
+hpte_t *htab_address;
+unsigned long htab_hash_mask;
+
+unsigned long _SDR1;
+
+#define KB (1024)
+#define MB (1024*KB)
+
+static inline void loop_forever(void)
+{
+	volatile unsigned long x = 1;
+	for(;x;x|=1)
+		;
+}
+
+static inline void create_pte_mapping(unsigned long start, unsigned long end,
+				      unsigned long mode, int large)
+{
+	unsigned long addr;
+	unsigned int step;
+	unsigned long tmp_mode;
+	unsigned long vflags;
+
+	if (large) {
+		step = 16*MB;
+		vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
+	} else {
+		step = 4*KB;
+		vflags = HPTE_V_BOLTED;
+	}
+
+	for (addr = start; addr < end; addr += step) {
+		unsigned long vpn, hash, hpteg;
+		unsigned long vsid = get_kernel_vsid(addr);
+		unsigned long va = (vsid << 28) | (addr & 0xfffffff);
+		int ret = -1;
+
+		if (large)
+			vpn = va >> HPAGE_SHIFT;
+		else
+			vpn = va >> PAGE_SHIFT;
+
+
+		tmp_mode = mode;
+		
+		/* Make non-kernel text non-executable */
+		if (!in_kernel_text(addr))
+			tmp_mode = mode | HW_NO_EXEC;
+
+		hash = hpt_hash(vpn, large);
+
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+#ifdef CONFIG_PPC_ISERIES
+		if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
+			ret = iSeries_hpte_bolt_or_insert(hpteg, va,
+				virt_to_abs(addr) >> PAGE_SHIFT,
+				vflags, tmp_mode);
+		else
+#endif
+#ifdef CONFIG_PPC_PSERIES
+		if (systemcfg->platform & PLATFORM_LPAR)
+			ret = pSeries_lpar_hpte_insert(hpteg, va,
+				virt_to_abs(addr) >> PAGE_SHIFT,
+				vflags, tmp_mode);
+		else
+#endif
+#ifdef CONFIG_PPC_MULTIPLATFORM
+			ret = native_hpte_insert(hpteg, va,
+				virt_to_abs(addr) >> PAGE_SHIFT,
+				vflags, tmp_mode);
+#endif
+
+		if (ret == -1) {
+			ppc64_terminate_msg(0x20, "create_pte_mapping");
+			loop_forever();
+		}
+	}
+}
+
+void __init htab_initialize(void)
+{
+	unsigned long table, htab_size_bytes;
+	unsigned long pteg_count;
+	unsigned long mode_rw;
+	int i, use_largepages = 0;
+	unsigned long base = 0, size = 0;
+	extern unsigned long tce_alloc_start, tce_alloc_end;
+
+	DBG(" -> htab_initialize()\n");
+
+	/*
+	 * Calculate the required size of the htab.  We want the number of
+	 * PTEGs to equal one half the number of real pages.
+	 */ 
+	htab_size_bytes = 1UL << ppc64_pft_size;
+	pteg_count = htab_size_bytes >> 7;
+
+	/* For debug, make the HTAB 1/8 as big as it normally would be. */
+	ifppcdebug(PPCDBG_HTABSIZE) {
+		pteg_count >>= 3;
+		htab_size_bytes = pteg_count << 7;
+	}
+
+	htab_hash_mask = pteg_count - 1;
+
+	if (systemcfg->platform & PLATFORM_LPAR) {
+		/* Using a hypervisor which owns the htab */
+		htab_address = NULL;
+		_SDR1 = 0; 
+	} else {
+		/* Find storage for the HPT.  Must be contiguous in
+		 * the absolute address space.
+		 */
+		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+
+		DBG("Hash table allocated at %lx, size: %lx\n", table,
+		    htab_size_bytes);
+
+		if ( !table ) {
+			ppc64_terminate_msg(0x20, "hpt space");
+			loop_forever();
+		}
+		htab_address = abs_to_virt(table);
+
+		/* htab absolute addr + encoded htabsize */
+		_SDR1 = table + __ilog2(pteg_count) - 11;
+
+		/* Initialize the HPT with no entries */
+		memset((void *)table, 0, htab_size_bytes);
+	}
+
+	mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
+
+	/* On U3 based machines, we need to reserve the DART area and
+	 * _NOT_ map it to avoid cache paradoxes as it's remapped non
+	 * cacheable later on
+	 */
+	if (cpu_has_feature(CPU_FTR_16M_PAGE))
+		use_largepages = 1;
+
+	/* create bolted the linear mapping in the hash table */
+	for (i=0; i < lmb.memory.cnt; i++) {
+		base = lmb.memory.region[i].base + KERNELBASE;
+		size = lmb.memory.region[i].size;
+
+		DBG("creating mapping for region: %lx : %lx\n", base, size);
+
+#ifdef CONFIG_U3_DART
+		/* Do not map the DART space. Fortunately, it will be aligned
+		 * in such a way that it will not cross two lmb regions and will
+		 * fit within a single 16Mb page.
+		 * The DART space is assumed to be a full 16Mb region even if we
+		 * only use 2Mb of that space. We will use more of it later for
+		 * AGP GART. We have to use a full 16Mb large page.
+		 */
+		DBG("DART base: %lx\n", dart_tablebase);
+
+		if (dart_tablebase != 0 && dart_tablebase >= base
+		    && dart_tablebase < (base + size)) {
+			if (base != dart_tablebase)
+				create_pte_mapping(base, dart_tablebase, mode_rw,
+						   use_largepages);
+			if ((base + size) > (dart_tablebase + 16*MB))
+				create_pte_mapping(dart_tablebase + 16*MB, base + size,
+						   mode_rw, use_largepages);
+			continue;
+		}
+#endif /* CONFIG_U3_DART */
+		create_pte_mapping(base, base + size, mode_rw, use_largepages);
+	}
+
+	/*
+	 * If we have a memory_limit and we've allocated TCEs then we need to
+	 * explicitly map the TCE area at the top of RAM. We also cope with the
+	 * case that the TCEs start below memory_limit.
+	 * tce_alloc_start/end are 16MB aligned so the mapping should work
+	 * for either 4K or 16MB pages.
+	 */
+	if (tce_alloc_start) {
+		tce_alloc_start += KERNELBASE;
+		tce_alloc_end += KERNELBASE;
+
+		if (base + size >= tce_alloc_start)
+			tce_alloc_start = base + size + 1;
+
+		create_pte_mapping(tce_alloc_start, tce_alloc_end,
+			mode_rw, use_largepages);
+	}
+
+	DBG(" <- htab_initialize()\n");
+}
+#undef KB
+#undef MB
+
+/*
+ * Called by asm hashtable.S for doing lazy icache flush
+ */
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
+{
+	struct page *page;
+
+	if (!pfn_valid(pte_pfn(pte)))
+		return pp;
+
+	page = pte_page(pte);
+
+	/* page is dirty */
+	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
+		if (trap == 0x400) {
+			__flush_dcache_icache(page_address(page));
+			set_bit(PG_arch_1, &page->flags);
+		} else
+			pp |= HW_NO_EXEC;
+	}
+	return pp;
+}
+
+/* Result code is:
+ *  0 - handled
+ *  1 - normal page fault
+ * -1 - critical hash insertion error
+ */
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+{
+	void *pgdir;
+	unsigned long vsid;
+	struct mm_struct *mm;
+	pte_t *ptep;
+	int ret;
+	int user_region = 0;
+	int local = 0;
+	cpumask_t tmp;
+
+	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
+		return 1;
+
+ 	switch (REGION_ID(ea)) {
+	case USER_REGION_ID:
+		user_region = 1;
+		mm = current->mm;
+		if (! mm)
+			return 1;
+
+		vsid = get_vsid(mm->context.id, ea);
+		break;
+	case VMALLOC_REGION_ID:
+		mm = &init_mm;
+		vsid = get_kernel_vsid(ea);
+		break;
+#if 0
+	case KERNEL_REGION_ID:
+		/*
+		 * Should never get here - entire 0xC0... region is bolted.
+		 * Send the problem up to do_page_fault 
+		 */
+#endif
+	default:
+		/* Not a valid range
+		 * Send the problem up to do_page_fault 
+		 */
+		return 1;
+		break;
+	}
+
+	pgdir = mm->pgd;
+
+	if (pgdir == NULL)
+		return 1;
+
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	/* Is this a huge page ? */
+	if (unlikely(in_hugepage_area(mm->context, ea)))
+		ret = hash_huge_page(mm, access, ea, vsid, local);
+	else {
+		ptep = find_linux_pte(pgdir, ea);
+		if (ptep == NULL)
+			return 1;
+		ret = __hash_page(ea, access, vsid, ptep, trap, local);
+	}
+
+	return ret;
+}
+
+void flush_hash_page(unsigned long va, pte_t pte, int local)
+{
+	unsigned long vpn, hash, secondary, slot;
+	unsigned long huge = pte_huge(pte);
+
+	if (huge)
+		vpn = va >> HPAGE_SHIFT;
+	else
+		vpn = va >> PAGE_SHIFT;
+	hash = hpt_hash(vpn, huge);
+	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
+	if (secondary)
+		hash = ~hash;
+	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
+
+	ppc_md.hpte_invalidate(slot, va, huge, local);
+}
+
+void flush_hash_range(unsigned long number, int local)
+{
+	if (ppc_md.flush_hash_range) {
+		ppc_md.flush_hash_range(number, local);
+	} else {
+		int i;
+		struct ppc64_tlb_batch *batch =
+			&__get_cpu_var(ppc64_tlb_batch);
+
+		for (i = 0; i < number; i++)
+			flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+	}
+}
+
+static inline void make_bl(unsigned int *insn_addr, void *func)
+{
+	unsigned long funcp = *((unsigned long *)func);
+	int offset = funcp - (unsigned long)insn_addr;
+
+	*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
+	flush_icache_range((unsigned long)insn_addr, 4+
+			   (unsigned long)insn_addr);
+}
+
+/*
+ * low_hash_fault is called when we the low level hash code failed
+ * to instert a PTE due to an hypervisor error
+ */
+void low_hash_fault(struct pt_regs *regs, unsigned long address)
+{
+	if (user_mode(regs)) {
+		siginfo_t info;
+
+		info.si_signo = SIGBUS;
+		info.si_errno = 0;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGBUS, &info, current);
+		return;
+	}
+	bad_page_fault(regs, address, SIGBUS);
+}
+
+void __init htab_finish_init(void)
+{
+	extern unsigned int *htab_call_hpte_insert1;
+	extern unsigned int *htab_call_hpte_insert2;
+	extern unsigned int *htab_call_hpte_remove;
+	extern unsigned int *htab_call_hpte_updatepp;
+
+	make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
+	make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
+	make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
+	make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
+}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
new file mode 100644
index 00000000000..0ea0994ed97
--- /dev/null
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -0,0 +1,745 @@
+/*
+ * PPC64 (POWER4) Huge TLB Page Support for Kernel.
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/tlb.h>
+
+#include <linux/sysctl.h>
+
+#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
+#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
+
+/* Modelled after find_linux_pte() */
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
+
+	BUG_ON(! in_hugepage_area(mm->context, addr));
+
+	addr &= HPAGE_MASK;
+
+	pg = pgd_offset(mm, addr);
+	if (!pgd_none(*pg)) {
+		pu = pud_offset(pg, addr);
+		if (!pud_none(*pu)) {
+			pm = pmd_offset(pu, addr);
+			pt = (pte_t *)pm;
+			BUG_ON(!pmd_none(*pm)
+			       && !(pte_present(*pt) && pte_huge(*pt)));
+			return pt;
+		}
+	}
+
+	return NULL;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
+
+	BUG_ON(! in_hugepage_area(mm->context, addr));
+
+	addr &= HPAGE_MASK;
+
+	pg = pgd_offset(mm, addr);
+	pu = pud_alloc(mm, pg, addr);
+
+	if (pu) {
+		pm = pmd_alloc(mm, pu, addr);
+		if (pm) {
+			pt = (pte_t *)pm;
+			BUG_ON(!pmd_none(*pm)
+			       && !(pte_present(*pt) && pte_huge(*pt)));
+			return pt;
+		}
+	}
+
+	return NULL;
+}
+
+#define HUGEPTE_BATCH_SIZE	(HPAGE_SIZE / PMD_SIZE)
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte)
+{
+	int i;
+
+	if (pte_present(*ptep)) {
+		pte_clear(mm, addr, ptep);
+		flush_tlb_pending();
+	}
+
+	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
+		*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+		ptep++;
+	}
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
+{
+	unsigned long old = pte_update(ptep, ~0UL);
+	int i;
+
+	if (old & _PAGE_HASHPTE)
+		hpte_update(mm, addr, old, 0);
+
+	for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
+		ptep[i] = __pte(0);
+
+	return __pte(old);
+}
+
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	if (! (within_hugepage_low_range(addr, len)
+	       || within_hugepage_high_range(addr, len)) )
+		return -EINVAL;
+	return 0;
+}
+
+static void flush_low_segments(void *parm)
+{
+	u16 areas = (unsigned long) parm;
+	unsigned long i;
+
+	asm volatile("isync" : : : "memory");
+
+	BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
+
+	for (i = 0; i < NUM_LOW_AREAS; i++) {
+		if (! (areas & (1U << i)))
+			continue;
+		asm volatile("slbie %0"
+			     : : "r" ((i << SID_SHIFT) | SLBIE_C));
+	}
+
+	asm volatile("isync" : : : "memory");
+}
+
+static void flush_high_segments(void *parm)
+{
+	u16 areas = (unsigned long) parm;
+	unsigned long i, j;
+
+	asm volatile("isync" : : : "memory");
+
+	BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
+
+	for (i = 0; i < NUM_HIGH_AREAS; i++) {
+		if (! (areas & (1U << i)))
+			continue;
+		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
+			asm volatile("slbie %0"
+				     :: "r" (((i << HTLB_AREA_SHIFT)
+					     + (j << SID_SHIFT)) | SLBIE_C));
+	}
+
+	asm volatile("isync" : : : "memory");
+}
+
+static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+	unsigned long start = area << SID_SHIFT;
+	unsigned long end = (area+1) << SID_SHIFT;
+	struct vm_area_struct *vma;
+
+	BUG_ON(area >= NUM_LOW_AREAS);
+
+	/* Check no VMAs are in the region */
+	vma = find_vma(mm, start);
+	if (vma && (vma->vm_start < end))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+	unsigned long start = area << HTLB_AREA_SHIFT;
+	unsigned long end = (area+1) << HTLB_AREA_SHIFT;
+	struct vm_area_struct *vma;
+
+	BUG_ON(area >= NUM_HIGH_AREAS);
+
+	/* Check no VMAs are in the region */
+	vma = find_vma(mm, start);
+	if (vma && (vma->vm_start < end))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
+{
+	unsigned long i;
+
+	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
+	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
+
+	newareas &= ~(mm->context.low_htlb_areas);
+	if (! newareas)
+		return 0; /* The segments we want are already open */
+
+	for (i = 0; i < NUM_LOW_AREAS; i++)
+		if ((1 << i) & newareas)
+			if (prepare_low_area_for_htlb(mm, i) != 0)
+				return -EBUSY;
+
+	mm->context.low_htlb_areas |= newareas;
+
+	/* update the paca copy of the context struct */
+	get_paca()->context = mm->context;
+
+	/* the context change must make it to memory before the flush,
+	 * so that further SLB misses do the right thing. */
+	mb();
+	on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
+
+	return 0;
+}
+
+static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
+{
+	unsigned long i;
+
+	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
+	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
+		     != NUM_HIGH_AREAS);
+
+	newareas &= ~(mm->context.high_htlb_areas);
+	if (! newareas)
+		return 0; /* The areas we want are already open */
+
+	for (i = 0; i < NUM_HIGH_AREAS; i++)
+		if ((1 << i) & newareas)
+			if (prepare_high_area_for_htlb(mm, i) != 0)
+				return -EBUSY;
+
+	mm->context.high_htlb_areas |= newareas;
+
+	/* update the paca copy of the context struct */
+	get_paca()->context = mm->context;
+
+	/* the context change must make it to memory before the flush,
+	 * so that further SLB misses do the right thing. */
+	mb();
+	on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
+
+	return 0;
+}
+
+int prepare_hugepage_range(unsigned long addr, unsigned long len)
+{
+	int err;
+
+	if ( (addr+len) < addr )
+		return -EINVAL;
+
+	if ((addr + len) < 0x100000000UL)
+		err = open_low_hpage_areas(current->mm,
+					  LOW_ESID_MASK(addr, len));
+	else
+		err = open_high_hpage_areas(current->mm,
+					    HTLB_AREA_MASK(addr, len));
+	if (err) {
+		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
+		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
+		       addr, len,
+		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
+		return err;
+	}
+
+	return 0;
+}
+
+struct page *
+follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+{
+	pte_t *ptep;
+	struct page *page;
+
+	if (! in_hugepage_area(mm->context, address))
+		return ERR_PTR(-EINVAL);
+
+	ptep = huge_pte_offset(mm, address);
+	page = pte_page(*ptep);
+	if (page)
+		page += (address % HPAGE_SIZE) / PAGE_SIZE;
+
+	return page;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+struct page *
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+		pmd_t *pmd, int write)
+{
+	BUG();
+	return NULL;
+}
+
+/* Because we have an exclusive hugepage region which lies within the
+ * normal user address space, we have to take special measures to make
+ * non-huge mmap()s evade the hugepage reserved regions. */
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+				     unsigned long len, unsigned long pgoff,
+				     unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (((TASK_SIZE - len) >= addr)
+		    && (!vma || (addr+len) <= vma->vm_start)
+		    && !is_hugepage_only_range(mm, addr,len))
+			return addr;
+	}
+	if (len > mm->cached_hole_size) {
+	        start_addr = addr = mm->free_area_cache;
+	} else {
+	        start_addr = addr = TASK_UNMAPPED_BASE;
+	        mm->cached_hole_size = 0;
+	}
+
+full_search:
+	vma = find_vma(mm, addr);
+	while (TASK_SIZE - len >= addr) {
+		BUG_ON(vma && (addr >= vma->vm_end));
+
+		if (touches_hugepage_low_range(mm, addr, len)) {
+			addr = ALIGN(addr+1, 1<<SID_SHIFT);
+			vma = find_vma(mm, addr);
+			continue;
+		}
+		if (touches_hugepage_high_range(mm, addr, len)) {
+			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
+			vma = find_vma(mm, addr);
+			continue;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			/*
+			 * Remember the place where we stopped the search:
+			 */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+		addr = vma->vm_end;
+		vma = vma->vm_next;
+	}
+
+	/* Make sure we didn't miss any holes */
+	if (start_addr != TASK_UNMAPPED_BASE) {
+		start_addr = addr = TASK_UNMAPPED_BASE;
+		mm->cached_hole_size = 0;
+		goto full_search;
+	}
+	return -ENOMEM;
+}
+
+/*
+ * This mmap-allocator allocates new areas top-down from below the
+ * stack's low limit (the base):
+ *
+ * Because we have an exclusive hugepage region which lies within the
+ * normal user address space, we have to take special measures to make
+ * non-huge mmap()s evade the hugepage reserved regions.
+ */
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma, *prev_vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long base = mm->mmap_base, addr = addr0;
+	unsigned long largest_hole = mm->cached_hole_size;
+	int first_time = 1;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	/* dont allow allocations above current base */
+	if (mm->free_area_cache > base)
+		mm->free_area_cache = base;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vma->vm_start)
+				&& !is_hugepage_only_range(mm, addr,len))
+			return addr;
+	}
+
+	if (len <= largest_hole) {
+	        largest_hole = 0;
+		mm->free_area_cache = base;
+	}
+try_again:
+	/* make sure it can fit in the remaining address space */
+	if (mm->free_area_cache < len)
+		goto fail;
+
+	/* either no address requested or cant fit in requested address hole */
+	addr = (mm->free_area_cache - len) & PAGE_MASK;
+	do {
+hugepage_recheck:
+		if (touches_hugepage_low_range(mm, addr, len)) {
+			addr = (addr & ((~0) << SID_SHIFT)) - len;
+			goto hugepage_recheck;
+		} else if (touches_hugepage_high_range(mm, addr, len)) {
+			addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
+			goto hugepage_recheck;
+		}
+
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * i.e. return with success:
+		 */
+ 	 	if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+			return addr;
+
+		/*
+		 * new region fits between prev_vma->vm_end and
+		 * vma->vm_start, use it:
+		 */
+		if (addr+len <= vma->vm_start &&
+		          (!prev_vma || (addr >= prev_vma->vm_end))) {
+			/* remember the address as a hint for next time */
+		        mm->cached_hole_size = largest_hole;
+		        return (mm->free_area_cache = addr);
+		} else {
+			/* pull free_area_cache down to the first hole */
+		        if (mm->free_area_cache == vma->vm_end) {
+				mm->free_area_cache = vma->vm_start;
+				mm->cached_hole_size = largest_hole;
+			}
+		}
+
+		/* remember the largest hole we saw so far */
+		if (addr + largest_hole < vma->vm_start)
+		        largest_hole = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+	} while (len <= vma->vm_start);
+
+fail:
+	/*
+	 * if hint left us with no space for the requested
+	 * mapping then try again:
+	 */
+	if (first_time) {
+		mm->free_area_cache = base;
+		largest_hole = 0;
+		first_time = 0;
+		goto try_again;
+	}
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	mm->cached_hole_size = ~0UL;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+
+static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
+{
+	unsigned long addr = 0;
+	struct vm_area_struct *vma;
+
+	vma = find_vma(current->mm, addr);
+	while (addr + len <= 0x100000000UL) {
+		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
+
+		if (! __within_hugepage_low_range(addr, len, segmask)) {
+			addr = ALIGN(addr+1, 1<<SID_SHIFT);
+			vma = find_vma(current->mm, addr);
+			continue;
+		}
+
+		if (!vma || (addr + len) <= vma->vm_start)
+			return addr;
+		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+		/* Depending on segmask this might not be a confirmed
+		 * hugepage region, so the ALIGN could have skipped
+		 * some VMAs */
+		vma = find_vma(current->mm, addr);
+	}
+
+	return -ENOMEM;
+}
+
+static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
+{
+	unsigned long addr = 0x100000000UL;
+	struct vm_area_struct *vma;
+
+	vma = find_vma(current->mm, addr);
+	while (addr + len <= TASK_SIZE_USER64) {
+		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
+
+		if (! __within_hugepage_high_range(addr, len, areamask)) {
+			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
+			vma = find_vma(current->mm, addr);
+			continue;
+		}
+
+		if (!vma || (addr + len) <= vma->vm_start)
+			return addr;
+		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+		/* Depending on segmask this might not be a confirmed
+		 * hugepage region, so the ALIGN could have skipped
+		 * some VMAs */
+		vma = find_vma(current->mm, addr);
+	}
+
+	return -ENOMEM;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+					unsigned long len, unsigned long pgoff,
+					unsigned long flags)
+{
+	int lastshift;
+	u16 areamask, curareas;
+
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+
+	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
+		return -EINVAL;
+
+	if (test_thread_flag(TIF_32BIT)) {
+		curareas = current->mm->context.low_htlb_areas;
+
+		/* First see if we can do the mapping in the existing
+		 * low areas */
+		addr = htlb_get_low_area(len, curareas);
+		if (addr != -ENOMEM)
+			return addr;
+
+		lastshift = 0;
+		for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
+		     ! lastshift; areamask >>=1) {
+			if (areamask & 1)
+				lastshift = 1;
+
+			addr = htlb_get_low_area(len, curareas | areamask);
+			if ((addr != -ENOMEM)
+			    && open_low_hpage_areas(current->mm, areamask) == 0)
+				return addr;
+		}
+	} else {
+		curareas = current->mm->context.high_htlb_areas;
+
+		/* First see if we can do the mapping in the existing
+		 * high areas */
+		addr = htlb_get_high_area(len, curareas);
+		if (addr != -ENOMEM)
+			return addr;
+
+		lastshift = 0;
+		for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
+		     ! lastshift; areamask >>=1) {
+			if (areamask & 1)
+				lastshift = 1;
+
+			addr = htlb_get_high_area(len, curareas | areamask);
+			if ((addr != -ENOMEM)
+			    && open_high_hpage_areas(current->mm, areamask) == 0)
+				return addr;
+		}
+	}
+	printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
+	       " enough areas\n");
+	return -ENOMEM;
+}
+
+int hash_huge_page(struct mm_struct *mm, unsigned long access,
+		   unsigned long ea, unsigned long vsid, int local)
+{
+	pte_t *ptep;
+	unsigned long va, vpn;
+	pte_t old_pte, new_pte;
+	unsigned long rflags, prpn;
+	long slot;
+	int err = 1;
+
+	spin_lock(&mm->page_table_lock);
+
+	ptep = huge_pte_offset(mm, ea);
+
+	/* Search the Linux page table for a match with va */
+	va = (vsid << 28) | (ea & 0x0fffffff);
+	vpn = va >> HPAGE_SHIFT;
+
+	/*
+	 * If no pte found or not present, send the problem up to
+	 * do_page_fault
+	 */
+	if (unlikely(!ptep || pte_none(*ptep)))
+		goto out;
+
+/* 	BUG_ON(pte_bad(*ptep)); */
+
+	/* 
+	 * Check the user's access rights to the page.  If access should be
+	 * prevented then send the problem up to do_page_fault.
+	 */
+	if (unlikely(access & ~pte_val(*ptep)))
+		goto out;
+	/*
+	 * At this point, we have a pte (old_pte) which can be used to build
+	 * or update an HPTE. There are 2 cases:
+	 *
+	 * 1. There is a valid (present) pte with no associated HPTE (this is 
+	 *	the most common case)
+	 * 2. There is a valid (present) pte with an associated HPTE. The
+	 *	current values of the pp bits in the HPTE prevent access
+	 *	because we are doing software DIRTY bit management and the
+	 *	page is currently not DIRTY. 
+	 */
+
+
+	old_pte = *ptep;
+	new_pte = old_pte;
+
+	rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ 	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+	rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+
+	/* Check if pte already has an hpte (case 2) */
+	if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+		/* There MIGHT be an HPTE for this pte */
+		unsigned long hash, slot;
+
+		hash = hpt_hash(vpn, 1);
+		if (pte_val(old_pte) & _PAGE_SECONDARY)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
+
+		if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
+			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
+		unsigned long hash = hpt_hash(vpn, 1);
+		unsigned long hpte_group;
+
+		prpn = pte_pfn(old_pte);
+
+repeat:
+		hpte_group = ((hash & htab_hash_mask) *
+			      HPTES_PER_GROUP) & ~0x7UL;
+
+		/* Update the linux pte with the HPTE slot */
+		pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
+		pte_val(new_pte) |= _PAGE_HASHPTE;
+
+		/* Add in WIMG bits */
+		/* XXX We should store these in the pte */
+		rflags |= _PAGE_COHERENT;
+
+		slot = ppc_md.hpte_insert(hpte_group, va, prpn,
+					  HPTE_V_LARGE, rflags);
+
+		/* Primary is full, try the secondary */
+		if (unlikely(slot == -1)) {
+			pte_val(new_pte) |= _PAGE_SECONDARY;
+			hpte_group = ((~hash & htab_hash_mask) *
+				      HPTES_PER_GROUP) & ~0x7UL; 
+			slot = ppc_md.hpte_insert(hpte_group, va, prpn,
+						  HPTE_V_LARGE |
+						  HPTE_V_SECONDARY,
+						  rflags);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = ((hash & htab_hash_mask) *
+						      HPTES_PER_GROUP)&~0x7UL;
+
+				ppc_md.hpte_remove(hpte_group);
+				goto repeat;
+                        }
+		}
+
+		if (unlikely(slot == -2))
+			panic("hash_huge_page: pte_insert failed\n");
+
+		pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
+
+		/* 
+		 * No need to use ldarx/stdcx here because all who
+		 * might be updating the pte will hold the
+		 * page_table_lock
+		 */
+		*ptep = new_pte;
+	}
+
+	err = 0;
+
+ out:
+	spin_unlock(&mm->page_table_lock);
+
+	return err;
+}
diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c
new file mode 100644
index 00000000000..c65b87b9275
--- /dev/null
+++ b/arch/powerpc/mm/imalloc.c
@@ -0,0 +1,317 @@
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ * 
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/semaphore.h>
+#include <asm/imalloc.h>
+#include <asm/cacheflush.h>
+
+static DECLARE_MUTEX(imlist_sem);
+struct vm_struct * imlist = NULL;
+
+static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
+{
+	unsigned long addr;
+	struct vm_struct **p, *tmp;
+
+	addr = ioremap_bot;
+	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+		if (size + addr < (unsigned long) tmp->addr)
+			break;
+		if ((unsigned long)tmp->addr >= ioremap_bot)
+			addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr >= IMALLOC_END-size)
+			return 1;
+	}
+	*im_addr = addr;
+
+	return 0;
+}
+
+/* Return whether the region described by v_addr and size is a subset
+ * of the region described by parent
+ */
+static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
+			struct vm_struct *parent)
+{
+	return (int) (v_addr >= (unsigned long) parent->addr &&
+	              v_addr < (unsigned long) parent->addr + parent->size &&
+	    	      size < parent->size);
+}
+
+/* Return whether the region described by v_addr and size is a superset
+ * of the region described by child
+ */
+static int im_region_is_superset(unsigned long v_addr, unsigned long size,
+		struct vm_struct *child)
+{
+	struct vm_struct parent;
+
+	parent.addr = (void *) v_addr;
+	parent.size = size;
+
+	return im_region_is_subset((unsigned long) child->addr, child->size,
+			&parent);
+}
+
+/* Return whether the region described by v_addr and size overlaps
+ * the region described by vm.  Overlapping regions meet the
+ * following conditions:
+ * 1) The regions share some part of the address space
+ * 2) The regions aren't identical
+ * 3) Neither region is a subset of the other
+ */
+static int im_region_overlaps(unsigned long v_addr, unsigned long size,
+		     struct vm_struct *vm)
+{
+	if (im_region_is_superset(v_addr, size, vm))
+		return 0;
+
+	return (v_addr + size > (unsigned long) vm->addr + vm->size &&
+		v_addr < (unsigned long) vm->addr + vm->size) ||
+	       (v_addr < (unsigned long) vm->addr &&
+		v_addr + size > (unsigned long) vm->addr);
+}
+
+/* Determine imalloc status of region described by v_addr and size.
+ * Can return one of the following:
+ * IM_REGION_UNUSED   -  Entire region is unallocated in imalloc space.
+ * IM_REGION_SUBSET -    Region is a subset of a region that is already
+ * 			 allocated in imalloc space.
+ * 		         vm will be assigned to a ptr to the parent region.
+ * IM_REGION_EXISTS -    Exact region already allocated in imalloc space.
+ *                       vm will be assigned to a ptr to the existing imlist
+ *                       member.
+ * IM_REGION_OVERLAPS -  Region overlaps an allocated region in imalloc space.
+ * IM_REGION_SUPERSET -  Region is a superset of a region that is already
+ *                       allocated in imalloc space.
+ */
+static int im_region_status(unsigned long v_addr, unsigned long size,
+		    struct vm_struct **vm)
+{
+	struct vm_struct *tmp;
+
+	for (tmp = imlist; tmp; tmp = tmp->next)
+		if (v_addr < (unsigned long) tmp->addr + tmp->size)
+			break;
+
+	if (tmp) {
+		if (im_region_overlaps(v_addr, size, tmp))
+			return IM_REGION_OVERLAP;
+
+		*vm = tmp;
+		if (im_region_is_subset(v_addr, size, tmp)) {
+			/* Return with tmp pointing to superset */
+			return IM_REGION_SUBSET;
+		}
+		if (im_region_is_superset(v_addr, size, tmp)) {
+			/* Return with tmp pointing to first subset */
+			return IM_REGION_SUPERSET;
+		}
+		else if (v_addr == (unsigned long) tmp->addr &&
+		 	 size == tmp->size) {
+			/* Return with tmp pointing to exact region */
+			return IM_REGION_EXISTS;
+		}
+	}
+
+	*vm = NULL;
+	return IM_REGION_UNUSED;
+}
+
+static struct vm_struct * split_im_region(unsigned long v_addr, 
+		unsigned long size, struct vm_struct *parent)
+{
+	struct vm_struct *vm1 = NULL;
+	struct vm_struct *vm2 = NULL;
+	struct vm_struct *new_vm = NULL;
+	
+	vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL);
+	if (vm1	== NULL) {
+		printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+		return NULL;
+	}
+
+	if (v_addr == (unsigned long) parent->addr) {
+	        /* Use existing parent vm_struct to represent child, allocate
+		 * new one for the remainder of parent range
+		 */
+		vm1->size = parent->size - size;
+		vm1->addr = (void *) (v_addr + size);
+		vm1->next = parent->next;
+
+		parent->size = size;
+		parent->next = vm1;
+		new_vm = parent;
+	} else if (v_addr + size == (unsigned long) parent->addr + 
+			parent->size) {
+		/* Allocate new vm_struct to represent child, use existing
+		 * parent one for remainder of parent range
+		 */
+		vm1->size = size;
+		vm1->addr = (void *) v_addr;
+		vm1->next = parent->next;
+		new_vm = vm1;
+
+		parent->size -= size;
+		parent->next = vm1;
+	} else {
+	        /* Allocate two new vm_structs for the new child and 
+		 * uppermost remainder, and use existing parent one for the
+		 * lower remainder of parent range
+		 */
+		vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL);
+		if (vm2 == NULL) {
+			printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+			kfree(vm1);
+			return NULL;
+		}
+
+		vm1->size = size;
+		vm1->addr = (void *) v_addr;
+		vm1->next = vm2;
+		new_vm = vm1;
+
+		vm2->size = ((unsigned long) parent->addr + parent->size) - 
+				(v_addr + size);
+		vm2->addr = (void *) v_addr + size;
+		vm2->next = parent->next;
+
+		parent->size = v_addr - (unsigned long) parent->addr;
+		parent->next = vm1;
+	}
+
+	return new_vm;
+}
+
+static struct vm_struct * __add_new_im_area(unsigned long req_addr, 
+					    unsigned long size)
+{
+	struct vm_struct **p, *tmp, *area;
+		
+	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+		if (req_addr + size <= (unsigned long)tmp->addr)
+			break;
+	}
+	
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+	area->flags = 0;
+	area->addr = (void *)req_addr;
+	area->size = size;
+	area->next = *p;
+	*p = area;
+
+	return area;
+}
+
+static struct vm_struct * __im_get_area(unsigned long req_addr, 
+					unsigned long size,
+					int criteria)
+{
+	struct vm_struct *tmp;
+	int status;
+
+	status = im_region_status(req_addr, size, &tmp);
+	if ((criteria & status) == 0) {
+		return NULL;
+	}
+	
+	switch (status) {
+	case IM_REGION_UNUSED:
+		tmp = __add_new_im_area(req_addr, size);
+		break;
+	case IM_REGION_SUBSET:
+		tmp = split_im_region(req_addr, size, tmp);
+		break;
+	case IM_REGION_EXISTS:
+		/* Return requested region */
+		break;
+	case IM_REGION_SUPERSET:
+		/* Return first existing subset of requested region */
+		break;
+	default:
+		printk(KERN_ERR "%s() unexpected imalloc region status\n",
+				__FUNCTION__);
+		tmp = NULL;
+	}
+
+	return tmp;
+}
+
+struct vm_struct * im_get_free_area(unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	
+	down(&imlist_sem);
+	if (get_free_im_addr(size, &addr)) {
+		printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
+				__FUNCTION__, size);
+		area = NULL;
+		goto next_im_done;
+	}
+
+	area = __im_get_area(addr, size, IM_REGION_UNUSED);
+	if (area == NULL) {
+		printk(KERN_ERR 
+		       "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
+			__FUNCTION__, addr, size);
+	}
+next_im_done:
+	up(&imlist_sem);
+	return area;
+}
+
+struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+		int criteria)
+{
+	struct vm_struct *area;
+
+	down(&imlist_sem);
+	area = __im_get_area(v_addr, size, criteria);
+	up(&imlist_sem);
+	return area;
+}
+
+void im_free(void * addr)
+{
+	struct vm_struct **p, *tmp;
+  
+	if (!addr)
+		return;
+	if ((unsigned long) addr & ~PAGE_MASK) {
+		printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__,			addr);
+		return;
+	}
+	down(&imlist_sem);
+	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
+		if (tmp->addr == addr) {
+			*p = tmp->next;
+
+			/* XXX: do we need the lock? */
+			spin_lock(&init_mm.page_table_lock);
+			unmap_vm_area(tmp);
+			spin_unlock(&init_mm.page_table_lock);
+
+			kfree(tmp);
+			up(&imlist_sem);
+			return;
+		}
+	}
+	up(&imlist_sem);
+	printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
+			addr);
+}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index c0ce6a7af3c..b0fc822ec29 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -73,18 +73,8 @@
 #warning TASK_SIZE is smaller than it needs to be.
 #endif
 
-int mem_init_done;
-unsigned long ioremap_bot = IMALLOC_BASE;
-static unsigned long phbs_io_bot = PHBS_IO_BASE;
-
-extern pgd_t swapper_pg_dir[];
-extern struct task_struct *current_set[NR_CPUS];
-
 unsigned long klimit = (unsigned long)_end;
 
-unsigned long _SDR1=0;
-unsigned long _ASR=0;
-
 /* max amount of RAM to use */
 unsigned long __max_memory;
 
@@ -193,19 +183,6 @@ static int __init setup_kcore(void)
 }
 module_init(setup_kcore);
 
-void __iomem * reserve_phb_iospace(unsigned long size)
-{
-	void __iomem *virt_addr;
-		
-	if (phbs_io_bot >= IMALLOC_BASE) 
-		panic("reserve_phb_iospace(): phb io space overflow\n");
-			
-	virt_addr = (void __iomem *) phbs_io_bot;
-	phbs_io_bot += size;
-
-	return virt_addr;
-}
-
 static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
 {
 	memset(addr, 0, kmem_cache_size(cache));
@@ -244,16 +221,3 @@ void pgtable_cache_init(void)
 			      name);
 	}
 }
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
-
-	if (!page_is_ram(addr >> PAGE_SHIFT))
-		vma_prot = __pgprot(pgprot_val(vma_prot)
-				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
-	return vma_prot;
-}
-EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0650de74d0b..55b5860ed3c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -47,6 +47,9 @@
 #include <asm/prom.h>
 #include <asm/lmb.h>
 #include <asm/sections.h>
+#ifdef CONFIG_PPC64
+#include <asm/vdso.h>
+#endif
 
 #include "mmu_decl.h"
 
@@ -334,7 +337,7 @@ void flush_dcache_icache_page(struct page *page)
 	void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
 	__flush_dcache_icache(start);
 	kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
-#elif defined(CONFIG_8xx)
+#elif defined(CONFIG_8xx) || defined(CONFIG_PPC64)
 	/* On 8xx there is no need to kmap since highmem is not supported */
 	__flush_dcache_icache(page_address(page)); 
 #else
@@ -463,18 +466,18 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	if (pgdir == NULL)
 		return;
 
-	ptep = find_linux_pte(pgdir, ea);
+	ptep = find_linux_pte(pgdir, address);
 	if (!ptep)
 		return;
 
-	vsid = get_vsid(vma->vm_mm->context.id, ea);
+	vsid = get_vsid(vma->vm_mm->context.id, address);
 
 	local_irq_save(flags);
 	tmp = cpumask_of_cpu(smp_processor_id());
 	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
 		local = 1;
 
-	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+	__hash_page(address, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
 		    0x300, local);
 	local_irq_restore(flags);
 #endif
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
new file mode 100644
index 00000000000..fe65f522aff
--- /dev/null
+++ b/arch/powerpc/mm/mmap.c
@@ -0,0 +1,86 @@
+/*
+ *  linux/arch/ppc64/mm/mmap.c
+ *
+ *  flexible mmap layout support
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
+ */
+
+#include <linux/personality.h>
+#include <linux/mm.h>
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static inline unsigned long mmap_base(void)
+{
+	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return TASK_SIZE - (gap & PAGE_MASK);
+}
+
+static inline int mmap_is_legacy(void)
+{
+	/*
+	 * Force standard allocation for 64 bit programs.
+	 */
+	if (!test_thread_flag(TIF_32BIT))
+		return 1;
+
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_legacy()) {
+		mm->mmap_base = TASK_UNMAPPED_BASE;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+		mm->unmap_area = arch_unmap_area;
+	} else {
+		mm->mmap_base = mmap_base();
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		mm->unmap_area = arch_unmap_area_topdown;
+	}
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 06fe8af3af5..a4d7a327c0e 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -22,11 +22,11 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu.h>
 
+#ifdef CONFIG_PPC32
 extern void mapin_ram(void);
 extern int map_page(unsigned long va, phys_addr_t pa, int flags);
 extern void setbat(int index, unsigned long virt, unsigned long phys,
 		   unsigned int size, int flags);
-extern void reserve_phys_mem(unsigned long start, unsigned long size);
 extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
 		      unsigned int size, int flags, unsigned int pid);
 extern void invalidate_tlbcam_entry(int index);
@@ -36,16 +36,16 @@ extern unsigned long ioremap_base;
 extern unsigned long ioremap_bot;
 extern unsigned int rtas_data, rtas_size;
 
-extern unsigned long __max_low_memory;
-extern unsigned long __initial_memory_limit;
-extern unsigned long total_memory;
-extern unsigned long total_lowmem;
-extern int mem_init_done;
-
 extern PTE *Hash, *Hash_end;
 extern unsigned long Hash_size, Hash_mask;
 
 extern unsigned int num_tlbcam_entries;
+#endif
+
+extern unsigned long __max_low_memory;
+extern unsigned long __initial_memory_limit;
+extern unsigned long total_memory;
+extern unsigned long total_lowmem;
 
 /* ...and now those things that may be slightly different between processor
  * architectures.  -- Dan
@@ -66,8 +66,8 @@ extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(void);
 extern void adjust_total_lowmem(void);
 
-#else
-/* anything except 4xx or 8xx */
+#elif defined(CONFIG_PPC32)
+/* anything 32-bit except 4xx or 8xx */
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(void);
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
new file mode 100644
index 00000000000..cb864b8f275
--- /dev/null
+++ b/arch/powerpc/mm/numa.c
@@ -0,0 +1,779 @@
+/*
+ * pSeries NUMA support
+ *
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/threads.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <asm/lmb.h>
+#include <asm/machdep.h>
+#include <asm/abs_addr.h>
+
+static int numa_enabled = 1;
+
+static int numa_debug;
+#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
+
+#ifdef DEBUG_NUMA
+#define ARRAY_INITIALISER -1
+#else
+#define ARRAY_INITIALISER 0
+#endif
+
+int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
+	ARRAY_INITIALISER};
+char *numa_memory_lookup_table;
+cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
+int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
+
+struct pglist_data *node_data[MAX_NUMNODES];
+bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
+static int min_common_depth;
+
+/*
+ * We need somewhere to store start/span for each node until we have
+ * allocated the real node_data structures.
+ */
+static struct {
+	unsigned long node_start_pfn;
+	unsigned long node_end_pfn;
+	unsigned long node_present_pages;
+} init_node_data[MAX_NUMNODES] __initdata;
+
+EXPORT_SYMBOL(node_data);
+EXPORT_SYMBOL(numa_cpu_lookup_table);
+EXPORT_SYMBOL(numa_memory_lookup_table);
+EXPORT_SYMBOL(numa_cpumask_lookup_table);
+EXPORT_SYMBOL(nr_cpus_in_node);
+
+static inline void map_cpu_to_node(int cpu, int node)
+{
+	numa_cpu_lookup_table[cpu] = node;
+	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) {
+		cpu_set(cpu, numa_cpumask_lookup_table[node]);
+		nr_cpus_in_node[node]++;
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unmap_cpu_from_node(unsigned long cpu)
+{
+	int node = numa_cpu_lookup_table[cpu];
+
+	dbg("removing cpu %lu from node %d\n", cpu, node);
+
+	if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
+		cpu_clear(cpu, numa_cpumask_lookup_table[node]);
+		nr_cpus_in_node[node]--;
+	} else {
+		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
+		       cpu, node);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static struct device_node * __devinit find_cpu_node(unsigned int cpu)
+{
+	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
+	struct device_node *cpu_node = NULL;
+	unsigned int *interrupt_server, *reg;
+	int len;
+
+	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
+		/* Try interrupt server first */
+		interrupt_server = (unsigned int *)get_property(cpu_node,
+					"ibm,ppc-interrupt-server#s", &len);
+
+		len = len / sizeof(u32);
+
+		if (interrupt_server && (len > 0)) {
+			while (len--) {
+				if (interrupt_server[len] == hw_cpuid)
+					return cpu_node;
+			}
+		} else {
+			reg = (unsigned int *)get_property(cpu_node,
+							   "reg", &len);
+			if (reg && (len > 0) && (reg[0] == hw_cpuid))
+				return cpu_node;
+		}
+	}
+
+	return NULL;
+}
+
+/* must hold reference to node during call */
+static int *of_get_associativity(struct device_node *dev)
+{
+	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
+}
+
+static int of_node_numa_domain(struct device_node *device)
+{
+	int numa_domain;
+	unsigned int *tmp;
+
+	if (min_common_depth == -1)
+		return 0;
+
+	tmp = of_get_associativity(device);
+	if (tmp && (tmp[0] >= min_common_depth)) {
+		numa_domain = tmp[min_common_depth];
+	} else {
+		dbg("WARNING: no NUMA information for %s\n",
+		    device->full_name);
+		numa_domain = 0;
+	}
+	return numa_domain;
+}
+
+/*
+ * In theory, the "ibm,associativity" property may contain multiple
+ * associativity lists because a resource may be multiply connected
+ * into the machine.  This resource then has different associativity
+ * characteristics relative to its multiple connections.  We ignore
+ * this for now.  We also assume that all cpu and memory sets have
+ * their distances represented at a common level.  This won't be
+ * true for heirarchical NUMA.
+ *
+ * In any case the ibm,associativity-reference-points should give
+ * the correct depth for a normal NUMA system.
+ *
+ * - Dave Hansen <haveblue@us.ibm.com>
+ */
+static int __init find_min_common_depth(void)
+{
+	int depth;
+	unsigned int *ref_points;
+	struct device_node *rtas_root;
+	unsigned int len;
+
+	rtas_root = of_find_node_by_path("/rtas");
+
+	if (!rtas_root)
+		return -1;
+
+	/*
+	 * this property is 2 32-bit integers, each representing a level of
+	 * depth in the associativity nodes.  The first is for an SMP
+	 * configuration (should be all 0's) and the second is for a normal
+	 * NUMA configuration.
+	 */
+	ref_points = (unsigned int *)get_property(rtas_root,
+			"ibm,associativity-reference-points", &len);
+
+	if ((len >= 1) && ref_points) {
+		depth = ref_points[1];
+	} else {
+		dbg("WARNING: could not find NUMA "
+		    "associativity reference point\n");
+		depth = -1;
+	}
+	of_node_put(rtas_root);
+
+	return depth;
+}
+
+static int __init get_mem_addr_cells(void)
+{
+	struct device_node *memory = NULL;
+	int rc;
+
+	memory = of_find_node_by_type(memory, "memory");
+	if (!memory)
+		return 0; /* it won't matter */
+
+	rc = prom_n_addr_cells(memory);
+	return rc;
+}
+
+static int __init get_mem_size_cells(void)
+{
+	struct device_node *memory = NULL;
+	int rc;
+
+	memory = of_find_node_by_type(memory, "memory");
+	if (!memory)
+		return 0; /* it won't matter */
+	rc = prom_n_size_cells(memory);
+	return rc;
+}
+
+static unsigned long read_n_cells(int n, unsigned int **buf)
+{
+	unsigned long result = 0;
+
+	while (n--) {
+		result = (result << 32) | **buf;
+		(*buf)++;
+	}
+	return result;
+}
+
+/*
+ * Figure out to which domain a cpu belongs and stick it there.
+ * Return the id of the domain used.
+ */
+static int numa_setup_cpu(unsigned long lcpu)
+{
+	int numa_domain = 0;
+	struct device_node *cpu = find_cpu_node(lcpu);
+
+	if (!cpu) {
+		WARN_ON(1);
+		goto out;
+	}
+
+	numa_domain = of_node_numa_domain(cpu);
+
+	if (numa_domain >= num_online_nodes()) {
+		/*
+		 * POWER4 LPAR uses 0xffff as invalid node,
+		 * dont warn in this case.
+		 */
+		if (numa_domain != 0xffff)
+			printk(KERN_ERR "WARNING: cpu %ld "
+			       "maps to invalid NUMA node %d\n",
+			       lcpu, numa_domain);
+		numa_domain = 0;
+	}
+out:
+	node_set_online(numa_domain);
+
+	map_cpu_to_node(lcpu, numa_domain);
+
+	of_node_put(cpu);
+
+	return numa_domain;
+}
+
+static int cpu_numa_callback(struct notifier_block *nfb,
+			     unsigned long action,
+			     void *hcpu)
+{
+	unsigned long lcpu = (unsigned long)hcpu;
+	int ret = NOTIFY_DONE;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (min_common_depth == -1 || !numa_enabled)
+			map_cpu_to_node(lcpu, 0);
+		else
+			numa_setup_cpu(lcpu);
+		ret = NOTIFY_OK;
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_DEAD:
+	case CPU_UP_CANCELED:
+		unmap_cpu_from_node(lcpu);
+		break;
+		ret = NOTIFY_OK;
+#endif
+	}
+	return ret;
+}
+
+/*
+ * Check and possibly modify a memory region to enforce the memory limit.
+ *
+ * Returns the size the region should have to enforce the memory limit.
+ * This will either be the original value of size, a truncated value,
+ * or zero. If the returned value of size is 0 the region should be
+ * discarded as it lies wholy above the memory limit.
+ */
+static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size)
+{
+	/*
+	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
+	 * we've already adjusted it for the limit and it takes care of
+	 * having memory holes below the limit.
+	 */
+	extern unsigned long memory_limit;
+
+	if (! memory_limit)
+		return size;
+
+	if (start + size <= lmb_end_of_DRAM())
+		return size;
+
+	if (start >= lmb_end_of_DRAM())
+		return 0;
+
+	return lmb_end_of_DRAM() - start;
+}
+
+static int __init parse_numa_properties(void)
+{
+	struct device_node *cpu = NULL;
+	struct device_node *memory = NULL;
+	int addr_cells, size_cells;
+	int max_domain = 0;
+	long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
+	unsigned long i;
+
+	if (numa_enabled == 0) {
+		printk(KERN_WARNING "NUMA disabled by user\n");
+		return -1;
+	}
+
+	numa_memory_lookup_table =
+		(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
+	memset(numa_memory_lookup_table, 0, entries * sizeof(char));
+
+	for (i = 0; i < entries ; i++)
+		numa_memory_lookup_table[i] = ARRAY_INITIALISER;
+
+	min_common_depth = find_min_common_depth();
+
+	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
+	if (min_common_depth < 0)
+		return min_common_depth;
+
+	max_domain = numa_setup_cpu(boot_cpuid);
+
+	/*
+	 * Even though we connect cpus to numa domains later in SMP init,
+	 * we need to know the maximum node id now. This is because each
+	 * node id must have NODE_DATA etc backing it.
+	 * As a result of hotplug we could still have cpus appear later on
+	 * with larger node ids. In that case we force the cpu into node 0.
+	 */
+	for_each_cpu(i) {
+		int numa_domain;
+
+		cpu = find_cpu_node(i);
+
+		if (cpu) {
+			numa_domain = of_node_numa_domain(cpu);
+			of_node_put(cpu);
+
+			if (numa_domain < MAX_NUMNODES &&
+			    max_domain < numa_domain)
+				max_domain = numa_domain;
+		}
+	}
+
+	addr_cells = get_mem_addr_cells();
+	size_cells = get_mem_size_cells();
+	memory = NULL;
+	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+		unsigned long start;
+		unsigned long size;
+		int numa_domain;
+		int ranges;
+		unsigned int *memcell_buf;
+		unsigned int len;
+
+		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+		if (!memcell_buf || len <= 0)
+			continue;
+
+		ranges = memory->n_addrs;
+new_range:
+		/* these are order-sensitive, and modify the buffer pointer */
+		start = read_n_cells(addr_cells, &memcell_buf);
+		size = read_n_cells(size_cells, &memcell_buf);
+
+		start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
+		size = _ALIGN_UP(size, MEMORY_INCREMENT);
+
+		numa_domain = of_node_numa_domain(memory);
+
+		if (numa_domain >= MAX_NUMNODES) {
+			if (numa_domain != 0xffff)
+				printk(KERN_ERR "WARNING: memory at %lx maps "
+				       "to invalid NUMA node %d\n", start,
+				       numa_domain);
+			numa_domain = 0;
+		}
+
+		if (max_domain < numa_domain)
+			max_domain = numa_domain;
+
+		if (! (size = numa_enforce_memory_limit(start, size))) {
+			if (--ranges)
+				goto new_range;
+			else
+				continue;
+		}
+
+		/*
+		 * Initialize new node struct, or add to an existing one.
+		 */
+		if (init_node_data[numa_domain].node_end_pfn) {
+			if ((start / PAGE_SIZE) <
+			    init_node_data[numa_domain].node_start_pfn)
+				init_node_data[numa_domain].node_start_pfn =
+					start / PAGE_SIZE;
+			if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) >
+			    init_node_data[numa_domain].node_end_pfn)
+				init_node_data[numa_domain].node_end_pfn =
+					(start / PAGE_SIZE) +
+					(size / PAGE_SIZE);
+
+			init_node_data[numa_domain].node_present_pages +=
+				size / PAGE_SIZE;
+		} else {
+			node_set_online(numa_domain);
+
+			init_node_data[numa_domain].node_start_pfn =
+				start / PAGE_SIZE;
+			init_node_data[numa_domain].node_end_pfn =
+				init_node_data[numa_domain].node_start_pfn +
+				size / PAGE_SIZE;
+			init_node_data[numa_domain].node_present_pages =
+				size / PAGE_SIZE;
+		}
+
+		for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
+			numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
+				numa_domain;
+
+		if (--ranges)
+			goto new_range;
+	}
+
+	for (i = 0; i <= max_domain; i++)
+		node_set_online(i);
+
+	return 0;
+}
+
+static void __init setup_nonnuma(void)
+{
+	unsigned long top_of_ram = lmb_end_of_DRAM();
+	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long i;
+
+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	       top_of_ram, total_ram);
+	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	       (top_of_ram - total_ram) >> 20);
+
+	if (!numa_memory_lookup_table) {
+		long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
+		numa_memory_lookup_table =
+			(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
+		memset(numa_memory_lookup_table, 0, entries * sizeof(char));
+		for (i = 0; i < entries ; i++)
+			numa_memory_lookup_table[i] = ARRAY_INITIALISER;
+	}
+
+	map_cpu_to_node(boot_cpuid, 0);
+
+	node_set_online(0);
+
+	init_node_data[0].node_start_pfn = 0;
+	init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE;
+	init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;
+
+	for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
+		numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
+}
+
+static void __init dump_numa_topology(void)
+{
+	unsigned int node;
+	unsigned int count;
+
+	if (min_common_depth == -1 || !numa_enabled)
+		return;
+
+	for_each_online_node(node) {
+		unsigned long i;
+
+		printk(KERN_INFO "Node %d Memory:", node);
+
+		count = 0;
+
+		for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) {
+			if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) {
+				if (count == 0)
+					printk(" 0x%lx", i);
+				++count;
+			} else {
+				if (count > 0)
+					printk("-0x%lx", i);
+				count = 0;
+			}
+		}
+
+		if (count > 0)
+			printk("-0x%lx", i);
+		printk("\n");
+	}
+	return;
+}
+
+/*
+ * Allocate some memory, satisfying the lmb or bootmem allocator where
+ * required. nid is the preferred node and end is the physical address of
+ * the highest address in the node.
+ *
+ * Returns the physical address of the memory.
+ */
+static unsigned long careful_allocation(int nid, unsigned long size,
+					unsigned long align, unsigned long end)
+{
+	unsigned long ret = lmb_alloc_base(size, align, end);
+
+	/* retry over all memory */
+	if (!ret)
+		ret = lmb_alloc_base(size, align, lmb_end_of_DRAM());
+
+	if (!ret)
+		panic("numa.c: cannot allocate %lu bytes on node %d",
+		      size, nid);
+
+	/*
+	 * If the memory came from a previously allocated node, we must
+	 * retry with the bootmem allocator.
+	 */
+	if (pa_to_nid(ret) < nid) {
+		nid = pa_to_nid(ret);
+		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid),
+				size, align, 0);
+
+		if (!ret)
+			panic("numa.c: cannot allocate %lu bytes on node %d",
+			      size, nid);
+
+		ret = virt_to_abs(ret);
+
+		dbg("alloc_bootmem %lx %lx\n", ret, size);
+	}
+
+	return ret;
+}
+
+void __init do_init_bootmem(void)
+{
+	int nid;
+	int addr_cells, size_cells;
+	struct device_node *memory = NULL;
+	static struct notifier_block ppc64_numa_nb = {
+		.notifier_call = cpu_numa_callback,
+		.priority = 1 /* Must run before sched domains notifier. */
+	};
+
+	min_low_pfn = 0;
+	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	max_pfn = max_low_pfn;
+
+	if (parse_numa_properties())
+		setup_nonnuma();
+	else
+		dump_numa_topology();
+
+	register_cpu_notifier(&ppc64_numa_nb);
+
+	for_each_online_node(nid) {
+		unsigned long start_paddr, end_paddr;
+		int i;
+		unsigned long bootmem_paddr;
+		unsigned long bootmap_pages;
+
+		start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
+		end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE;
+
+		/* Allocate the node structure node local if possible */
+		NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
+					sizeof(struct pglist_data),
+					SMP_CACHE_BYTES, end_paddr);
+		NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid));
+		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+  		dbg("node %d\n", nid);
+		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
+
+		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+		NODE_DATA(nid)->node_start_pfn =
+			init_node_data[nid].node_start_pfn;
+		NODE_DATA(nid)->node_spanned_pages =
+			end_paddr - start_paddr;
+
+		if (NODE_DATA(nid)->node_spanned_pages == 0)
+  			continue;
+
+  		dbg("start_paddr = %lx\n", start_paddr);
+  		dbg("end_paddr = %lx\n", end_paddr);
+
+		bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT);
+
+		bootmem_paddr = careful_allocation(nid,
+				bootmap_pages << PAGE_SHIFT,
+				PAGE_SIZE, end_paddr);
+		memset(abs_to_virt(bootmem_paddr), 0,
+		       bootmap_pages << PAGE_SHIFT);
+		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
+
+		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+				  start_paddr >> PAGE_SHIFT,
+				  end_paddr >> PAGE_SHIFT);
+
+		/*
+		 * We need to do another scan of all memory sections to
+		 * associate memory with the correct node.
+		 */
+		addr_cells = get_mem_addr_cells();
+		size_cells = get_mem_size_cells();
+		memory = NULL;
+		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+			unsigned long mem_start, mem_size;
+			int numa_domain, ranges;
+			unsigned int *memcell_buf;
+			unsigned int len;
+
+			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+			if (!memcell_buf || len <= 0)
+				continue;
+
+			ranges = memory->n_addrs;	/* ranges in cell */
+new_range:
+			mem_start = read_n_cells(addr_cells, &memcell_buf);
+			mem_size = read_n_cells(size_cells, &memcell_buf);
+			if (numa_enabled) {
+				numa_domain = of_node_numa_domain(memory);
+				if (numa_domain  >= MAX_NUMNODES)
+					numa_domain = 0;
+			} else
+				numa_domain =  0;
+
+			if (numa_domain != nid)
+				continue;
+
+			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
+  			if (mem_size) {
+  				dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
+  				free_bootmem_node(NODE_DATA(nid), mem_start, mem_size);
+			}
+
+			if (--ranges)		/* process all ranges in cell */
+				goto new_range;
+		}
+
+		/*
+		 * Mark reserved regions on this node
+		 */
+		for (i = 0; i < lmb.reserved.cnt; i++) {
+			unsigned long physbase = lmb.reserved.region[i].base;
+			unsigned long size = lmb.reserved.region[i].size;
+
+			if (pa_to_nid(physbase) != nid &&
+			    pa_to_nid(physbase+size-1) != nid)
+				continue;
+
+			if (physbase < end_paddr &&
+			    (physbase+size) > start_paddr) {
+				/* overlaps */
+				if (physbase < start_paddr) {
+					size -= start_paddr - physbase;
+					physbase = start_paddr;
+				}
+
+				if (size > end_paddr - physbase)
+					size = end_paddr - physbase;
+
+				dbg("reserve_bootmem %lx %lx\n", physbase,
+				    size);
+				reserve_bootmem_node(NODE_DATA(nid), physbase,
+						     size);
+			}
+		}
+		/*
+		 * This loop may look famaliar, but we have to do it again
+		 * after marking our reserved memory to mark memory present
+		 * for sparsemem.
+		 */
+		addr_cells = get_mem_addr_cells();
+		size_cells = get_mem_size_cells();
+		memory = NULL;
+		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
+			unsigned long mem_start, mem_size;
+			int numa_domain, ranges;
+			unsigned int *memcell_buf;
+			unsigned int len;
+
+			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
+			if (!memcell_buf || len <= 0)
+				continue;
+
+			ranges = memory->n_addrs;	/* ranges in cell */
+new_range2:
+			mem_start = read_n_cells(addr_cells, &memcell_buf);
+			mem_size = read_n_cells(size_cells, &memcell_buf);
+			if (numa_enabled) {
+				numa_domain = of_node_numa_domain(memory);
+				if (numa_domain  >= MAX_NUMNODES)
+					numa_domain = 0;
+			} else
+				numa_domain =  0;
+
+			if (numa_domain != nid)
+				continue;
+
+			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
+			memory_present(numa_domain, mem_start >> PAGE_SHIFT,
+				       (mem_start + mem_size) >> PAGE_SHIFT);
+
+			if (--ranges)		/* process all ranges in cell */
+				goto new_range2;
+		}
+
+	}
+}
+
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES];
+	unsigned long zholes_size[MAX_NR_ZONES];
+	int nid;
+
+	memset(zones_size, 0, sizeof(zones_size));
+	memset(zholes_size, 0, sizeof(zholes_size));
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn;
+		unsigned long end_pfn;
+
+		start_pfn = init_node_data[nid].node_start_pfn;
+		end_pfn = init_node_data[nid].node_end_pfn;
+
+		zones_size[ZONE_DMA] = end_pfn - start_pfn;
+		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
+			init_node_data[nid].node_present_pages;
+
+		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
+		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
+
+		free_area_init_node(nid, NODE_DATA(nid), zones_size,
+							start_pfn, zholes_size);
+	}
+}
+
+static int __init early_numa(char *p)
+{
+	if (!p)
+		return 0;
+
+	if (strstr(p, "off"))
+		numa_enabled = 0;
+
+	if (strstr(p, "debug"))
+		numa_debug = 1;
+
+	return 0;
+}
+early_param("numa", early_numa);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 724f97e5dee..484d24f9208 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -67,30 +67,9 @@
 #include <asm/vdso.h>
 #include <asm/imalloc.h>
 
-#if PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
-
-int mem_init_done;
 unsigned long ioremap_bot = IMALLOC_BASE;
 static unsigned long phbs_io_bot = PHBS_IO_BASE;
 
-extern pgd_t swapper_pg_dir[];
-extern struct task_struct *current_set[NR_CPUS];
-
-unsigned long klimit = (unsigned long)_end;
-
-/* max amount of RAM to use */
-unsigned long __max_memory;
-
-/* info on what we think the IO hole is */
-unsigned long 	io_hole_start;
-unsigned long	io_hole_size;
-
 #ifdef CONFIG_PPC_ISERIES
 
 void __iomem *ioremap(unsigned long addr, unsigned long size)
@@ -355,3 +334,16 @@ int iounmap_explicit(volatile void __iomem *start, unsigned long size)
 EXPORT_SYMBOL(ioremap);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
+
+void __iomem * reserve_phb_iospace(unsigned long size)
+{
+	void __iomem *virt_addr;
+		
+	if (phbs_io_bot >= IMALLOC_BASE) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void __iomem *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
new file mode 100644
index 00000000000..0473953f6a3
--- /dev/null
+++ b/arch/powerpc/mm/slb.c
@@ -0,0 +1,158 @@
+/*
+ * PowerPC64 SLB support.
+ *
+ * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
+ * Based on earlier code writteh by:
+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
+ *    Copyright (c) 2001 Dave Engebretsen
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/paca.h>
+#include <asm/cputable.h>
+
+extern void slb_allocate(unsigned long ea);
+
+static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
+{
+	return (ea & ESID_MASK) | SLB_ESID_V | slot;
+}
+
+static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags)
+{
+	return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags;
+}
+
+static inline void create_slbe(unsigned long ea, unsigned long flags,
+			       unsigned long entry)
+{
+	asm volatile("slbmte  %0,%1" :
+		     : "r" (mk_vsid_data(ea, flags)),
+		       "r" (mk_esid_data(ea, entry))
+		     : "memory" );
+}
+
+static void slb_flush_and_rebolt(void)
+{
+	/* If you change this make sure you change SLB_NUM_BOLTED
+	 * appropriately too. */
+	unsigned long ksp_flags = SLB_VSID_KERNEL;
+	unsigned long ksp_esid_data;
+
+	WARN_ON(!irqs_disabled());
+
+	if (cpu_has_feature(CPU_FTR_16M_PAGE))
+		ksp_flags |= SLB_VSID_L;
+
+	ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
+	if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
+		ksp_esid_data &= ~SLB_ESID_V;
+
+	/* We need to do this all in asm, so we're sure we don't touch
+	 * the stack between the slbia and rebolting it. */
+	asm volatile("isync\n"
+		     "slbia\n"
+		     /* Slot 1 - first VMALLOC segment */
+		     "slbmte	%0,%1\n"
+		     /* Slot 2 - kernel stack */
+		     "slbmte	%2,%3\n"
+		     "isync"
+		     :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
+		        "r"(mk_esid_data(VMALLOCBASE, 1)),
+		        "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
+		        "r"(ksp_esid_data)
+		     : "memory");
+}
+
+/* Flush all user entries from the segment table of the current processor. */
+void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
+{
+	unsigned long offset = get_paca()->slb_cache_ptr;
+	unsigned long esid_data = 0;
+	unsigned long pc = KSTK_EIP(tsk);
+	unsigned long stack = KSTK_ESP(tsk);
+	unsigned long unmapped_base;
+
+	if (offset <= SLB_CACHE_ENTRIES) {
+		int i;
+		asm volatile("isync" : : : "memory");
+		for (i = 0; i < offset; i++) {
+			esid_data = ((unsigned long)get_paca()->slb_cache[i]
+				<< SID_SHIFT) | SLBIE_C;
+			asm volatile("slbie %0" : : "r" (esid_data));
+		}
+		asm volatile("isync" : : : "memory");
+	} else {
+		slb_flush_and_rebolt();
+	}
+
+	/* Workaround POWER5 < DD2.1 issue */
+	if (offset == 1 || offset > SLB_CACHE_ENTRIES)
+		asm volatile("slbie %0" : : "r" (esid_data));
+
+	get_paca()->slb_cache_ptr = 0;
+	get_paca()->context = mm->context;
+
+	/*
+	 * preload some userspace segments into the SLB.
+	 */
+	if (test_tsk_thread_flag(tsk, TIF_32BIT))
+		unmapped_base = TASK_UNMAPPED_BASE_USER32;
+	else
+		unmapped_base = TASK_UNMAPPED_BASE_USER64;
+
+	if (pc >= KERNELBASE)
+		return;
+	slb_allocate(pc);
+
+	if (GET_ESID(pc) == GET_ESID(stack))
+		return;
+
+	if (stack >= KERNELBASE)
+		return;
+	slb_allocate(stack);
+
+	if ((GET_ESID(pc) == GET_ESID(unmapped_base))
+	    || (GET_ESID(stack) == GET_ESID(unmapped_base)))
+		return;
+
+	if (unmapped_base >= KERNELBASE)
+		return;
+	slb_allocate(unmapped_base);
+}
+
+void slb_initialize(void)
+{
+	/* On iSeries the bolted entries have already been set up by
+	 * the hypervisor from the lparMap data in head.S */
+#ifndef CONFIG_PPC_ISERIES
+	unsigned long flags = SLB_VSID_KERNEL;
+
+ 	/* Invalidate the entire SLB (even slot 0) & all the ERATS */
+ 	if (cpu_has_feature(CPU_FTR_16M_PAGE))
+ 		flags |= SLB_VSID_L;
+
+ 	asm volatile("isync":::"memory");
+ 	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
+	asm volatile("isync; slbia; isync":::"memory");
+	create_slbe(KERNELBASE, flags, 0);
+	create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
+	/* We don't bolt the stack for the time being - we're in boot,
+	 * so the stack is in the bolted segment.  By the time it goes
+	 * elsewhere, we'll call _switch() which will bolt in the new
+	 * one. */
+	asm volatile("isync":::"memory");
+#endif
+
+	get_paca()->stab_rr = SLB_NUM_BOLTED;
+}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
new file mode 100644
index 00000000000..a3a03da503b
--- /dev/null
+++ b/arch/powerpc/mm/slb_low.S
@@ -0,0 +1,151 @@
+/*
+ * arch/ppc64/mm/slb_low.S
+ *
+ * Low-level SLB routines
+ *
+ * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
+ *
+ * Based on earlier C version:
+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
+ *    Copyright (c) 2001 Dave Engebretsen
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+
+/* void slb_allocate(unsigned long ea);
+ *
+ * Create an SLB entry for the given EA (user or kernel).
+ * 	r3 = faulting address, r13 = PACA
+ *	r9, r10, r11 are clobbered by this function
+ * No other registers are examined or changed.
+ */
+_GLOBAL(slb_allocate)
+	/*
+	 * First find a slot, round robin. Previously we tried to find
+	 * a free slot first but that took too long. Unfortunately we
+	 * dont have any LRU information to help us choose a slot.
+	 */
+#ifdef CONFIG_PPC_ISERIES
+	/*
+	 * On iSeries, the "bolted" stack segment can be cast out on
+	 * shared processor switch so we need to check for a miss on
+	 * it and restore it to the right slot.
+	 */
+	ld	r9,PACAKSAVE(r13)
+	clrrdi	r9,r9,28
+	clrrdi	r11,r3,28
+	li	r10,SLB_NUM_BOLTED-1	/* Stack goes in last bolted slot */
+	cmpld	r9,r11
+	beq	3f
+#endif /* CONFIG_PPC_ISERIES */
+
+	ld	r10,PACASTABRR(r13)
+	addi	r10,r10,1
+	/* use a cpu feature mask if we ever change our slb size */
+	cmpldi	r10,SLB_NUM_ENTRIES
+
+	blt+	4f
+	li	r10,SLB_NUM_BOLTED
+
+4:
+	std	r10,PACASTABRR(r13)
+3:
+	/* r3 = faulting address, r10 = entry */
+
+	srdi	r9,r3,60		/* get region */
+	srdi	r3,r3,28		/* get esid */
+	cmpldi	cr7,r9,0xc		/* cmp KERNELBASE for later use */
+
+	rldimi	r10,r3,28,0		/* r10= ESID<<28 | entry */
+	oris	r10,r10,SLB_ESID_V@h	/* r10 |= SLB_ESID_V */
+
+	/* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
+
+	blt	cr7,0f			/* user or kernel? */
+
+	/* kernel address: proto-VSID = ESID */
+	/* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
+	 * this code will generate the protoVSID 0xfffffffff for the
+	 * top segment.  That's ok, the scramble below will translate
+	 * it to VSID 0, which is reserved as a bad VSID - one which
+	 * will never have any pages in it.  */
+	li	r11,SLB_VSID_KERNEL
+BEGIN_FTR_SECTION
+	bne	cr7,9f
+	li	r11,(SLB_VSID_KERNEL|SLB_VSID_L)
+END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+	b	9f
+
+0:	/* user address: proto-VSID = context<<15 | ESID */
+	srdi.	r9,r3,USER_ESID_BITS
+	bne-	8f			/* invalid ea bits set */
+
+#ifdef CONFIG_HUGETLB_PAGE
+BEGIN_FTR_SECTION
+	lhz	r9,PACAHIGHHTLBAREAS(r13)
+	srdi	r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
+	srd	r9,r9,r11
+	lhz	r11,PACALOWHTLBAREAS(r13)
+	srd	r11,r11,r3
+	or	r9,r9,r11
+END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+#endif /* CONFIG_HUGETLB_PAGE */
+
+	li	r11,SLB_VSID_USER
+
+#ifdef CONFIG_HUGETLB_PAGE
+BEGIN_FTR_SECTION
+	rldimi	r11,r9,8,55		/* shift masked bit into SLB_VSID_L */
+END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
+#endif /* CONFIG_HUGETLB_PAGE */
+
+	ld	r9,PACACONTEXTID(r13)
+	rldimi	r3,r9,USER_ESID_BITS,0
+
+9:	/* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
+	ASM_VSID_SCRAMBLE(r3,r9)
+
+	rldimi	r11,r3,SLB_VSID_SHIFT,16	/* combine VSID and flags */
+
+	/*
+	 * No need for an isync before or after this slbmte. The exception
+	 * we enter with and the rfid we exit with are context synchronizing.
+	 */
+	slbmte	r11,r10
+
+	bgelr	cr7			/* we're done for kernel addresses */
+
+	/* Update the slb cache */
+	lhz	r3,PACASLBCACHEPTR(r13)	/* offset = paca->slb_cache_ptr */
+	cmpldi	r3,SLB_CACHE_ENTRIES
+	bge	1f
+
+	/* still room in the slb cache */
+	sldi	r11,r3,1		/* r11 = offset * sizeof(u16) */
+	rldicl	r10,r10,36,28		/* get low 16 bits of the ESID */
+	add	r11,r11,r13		/* r11 = (u16 *)paca + offset */
+	sth	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
+	addi	r3,r3,1			/* offset++ */
+	b	2f
+1:					/* offset >= SLB_CACHE_ENTRIES */
+	li	r3,SLB_CACHE_ENTRIES+1
+2:
+	sth	r3,PACASLBCACHEPTR(r13)	/* paca->slb_cache_ptr = offset */
+	blr
+
+8:	/* invalid EA */
+	li	r3,0			/* BAD_VSID */
+	li	r11,SLB_VSID_USER	/* flags don't much matter */
+	b	9b
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
new file mode 100644
index 00000000000..1b83f002bf2
--- /dev/null
+++ b/arch/powerpc/mm/stab.c
@@ -0,0 +1,279 @@
+/*
+ * PowerPC64 Segment Translation Support.
+ *
+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
+ *    Copyright (c) 2001 Dave Engebretsen
+ *
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/paca.h>
+#include <asm/cputable.h>
+#include <asm/lmb.h>
+#include <asm/abs_addr.h>
+
+struct stab_entry {
+	unsigned long esid_data;
+	unsigned long vsid_data;
+};
+
+/* Both the segment table and SLB code uses the following cache */
+#define NR_STAB_CACHE_ENTRIES 8
+DEFINE_PER_CPU(long, stab_cache_ptr);
+DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
+
+/*
+ * Create a segment table entry for the given esid/vsid pair.
+ */
+static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
+{
+	unsigned long esid_data, vsid_data;
+	unsigned long entry, group, old_esid, castout_entry, i;
+	unsigned int global_entry;
+	struct stab_entry *ste, *castout_ste;
+	unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE;
+
+	vsid_data = vsid << STE_VSID_SHIFT;
+	esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
+	if (! kernel_segment)
+		esid_data |= STE_ESID_KS;
+
+	/* Search the primary group first. */
+	global_entry = (esid & 0x1f) << 3;
+	ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
+
+	/* Find an empty entry, if one exists. */
+	for (group = 0; group < 2; group++) {
+		for (entry = 0; entry < 8; entry++, ste++) {
+			if (!(ste->esid_data & STE_ESID_V)) {
+				ste->vsid_data = vsid_data;
+				asm volatile("eieio":::"memory");
+				ste->esid_data = esid_data;
+				return (global_entry | entry);
+			}
+		}
+		/* Now search the secondary group. */
+		global_entry = ((~esid) & 0x1f) << 3;
+		ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
+	}
+
+	/*
+	 * Could not find empty entry, pick one with a round robin selection.
+	 * Search all entries in the two groups.
+	 */
+	castout_entry = get_paca()->stab_rr;
+	for (i = 0; i < 16; i++) {
+		if (castout_entry < 8) {
+			global_entry = (esid & 0x1f) << 3;
+			ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
+			castout_ste = ste + castout_entry;
+		} else {
+			global_entry = ((~esid) & 0x1f) << 3;
+			ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
+			castout_ste = ste + (castout_entry - 8);
+		}
+
+		/* Dont cast out the first kernel segment */
+		if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE)
+			break;
+
+		castout_entry = (castout_entry + 1) & 0xf;
+	}
+
+	get_paca()->stab_rr = (castout_entry + 1) & 0xf;
+
+	/* Modify the old entry to the new value. */
+
+	/* Force previous translations to complete. DRENG */
+	asm volatile("isync" : : : "memory");
+
+	old_esid = castout_ste->esid_data >> SID_SHIFT;
+	castout_ste->esid_data = 0;		/* Invalidate old entry */
+
+	asm volatile("sync" : : : "memory");    /* Order update */
+
+	castout_ste->vsid_data = vsid_data;
+	asm volatile("eieio" : : : "memory");   /* Order update */
+	castout_ste->esid_data = esid_data;
+
+	asm volatile("slbie  %0" : : "r" (old_esid << SID_SHIFT));
+	/* Ensure completion of slbie */
+	asm volatile("sync" : : : "memory");
+
+	return (global_entry | (castout_entry & 0x7));
+}
+
+/*
+ * Allocate a segment table entry for the given ea and mm
+ */
+static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
+{
+	unsigned long vsid;
+	unsigned char stab_entry;
+	unsigned long offset;
+
+	/* Kernel or user address? */
+	if (ea >= KERNELBASE) {
+		vsid = get_kernel_vsid(ea);
+	} else {
+		if ((ea >= TASK_SIZE_USER64) || (! mm))
+			return 1;
+
+		vsid = get_vsid(mm->context.id, ea);
+	}
+
+	stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
+
+	if (ea < KERNELBASE) {
+		offset = __get_cpu_var(stab_cache_ptr);
+		if (offset < NR_STAB_CACHE_ENTRIES)
+			__get_cpu_var(stab_cache[offset++]) = stab_entry;
+		else
+			offset = NR_STAB_CACHE_ENTRIES+1;
+		__get_cpu_var(stab_cache_ptr) = offset;
+
+		/* Order update */
+		asm volatile("sync":::"memory");
+	}
+
+	return 0;
+}
+
+int ste_allocate(unsigned long ea)
+{
+	return __ste_allocate(ea, current->mm);
+}
+
+/*
+ * Do the segment table work for a context switch: flush all user
+ * entries from the table, then preload some probably useful entries
+ * for the new task
+ */
+void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
+	struct stab_entry *ste;
+	unsigned long offset = __get_cpu_var(stab_cache_ptr);
+	unsigned long pc = KSTK_EIP(tsk);
+	unsigned long stack = KSTK_ESP(tsk);
+	unsigned long unmapped_base;
+
+	/* Force previous translations to complete. DRENG */
+	asm volatile("isync" : : : "memory");
+
+	if (offset <= NR_STAB_CACHE_ENTRIES) {
+		int i;
+
+		for (i = 0; i < offset; i++) {
+			ste = stab + __get_cpu_var(stab_cache[i]);
+			ste->esid_data = 0; /* invalidate entry */
+		}
+	} else {
+		unsigned long entry;
+
+		/* Invalidate all entries. */
+		ste = stab;
+
+		/* Never flush the first entry. */
+		ste += 1;
+		for (entry = 1;
+		     entry < (PAGE_SIZE / sizeof(struct stab_entry));
+		     entry++, ste++) {
+			unsigned long ea;
+			ea = ste->esid_data & ESID_MASK;
+			if (ea < KERNELBASE) {
+				ste->esid_data = 0;
+			}
+		}
+	}
+
+	asm volatile("sync; slbia; sync":::"memory");
+
+	__get_cpu_var(stab_cache_ptr) = 0;
+
+	/* Now preload some entries for the new task */
+	if (test_tsk_thread_flag(tsk, TIF_32BIT))
+		unmapped_base = TASK_UNMAPPED_BASE_USER32;
+	else
+		unmapped_base = TASK_UNMAPPED_BASE_USER64;
+
+	__ste_allocate(pc, mm);
+
+	if (GET_ESID(pc) == GET_ESID(stack))
+		return;
+
+	__ste_allocate(stack, mm);
+
+	if ((GET_ESID(pc) == GET_ESID(unmapped_base))
+	    || (GET_ESID(stack) == GET_ESID(unmapped_base)))
+		return;
+
+	__ste_allocate(unmapped_base, mm);
+
+	/* Order update */
+	asm volatile("sync" : : : "memory");
+}
+
+extern void slb_initialize(void);
+
+/*
+ * Allocate segment tables for secondary CPUs.  These must all go in
+ * the first (bolted) segment, so that do_stab_bolted won't get a
+ * recursive segment miss on the segment table itself.
+ */
+void stabs_alloc(void)
+{
+	int cpu;
+
+	if (cpu_has_feature(CPU_FTR_SLB))
+		return;
+
+	for_each_cpu(cpu) {
+		unsigned long newstab;
+
+		if (cpu == 0)
+			continue; /* stab for CPU 0 is statically allocated */
+
+		newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
+		if (! newstab)
+			panic("Unable to allocate segment table for CPU %d.\n",
+			      cpu);
+
+		newstab += KERNELBASE;
+
+		memset((void *)newstab, 0, PAGE_SIZE);
+
+		paca[cpu].stab_addr = newstab;
+		paca[cpu].stab_real = virt_to_abs(newstab);
+		printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
+	}
+}
+
+/*
+ * Build an entry for the base kernel segment and put it into
+ * the segment table or SLB.  All other segment table or SLB
+ * entries are faulted in.
+ */
+void stab_initialize(unsigned long stab)
+{
+	unsigned long vsid = get_kernel_vsid(KERNELBASE);
+
+	if (cpu_has_feature(CPU_FTR_SLB)) {
+		slb_initialize();
+	} else {
+		asm volatile("isync; slbia; isync":::"memory");
+		make_ste(stab, GET_ESID(KERNELBASE), vsid);
+
+		/* Order update */
+		asm volatile("sync":::"memory");
+	}
+}
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
new file mode 100644
index 00000000000..09ab81a10f4
--- /dev/null
+++ b/arch/powerpc/mm/tlb_64.c
@@ -0,0 +1,196 @@
+/*
+ * This file contains the routines for flushing entries from the
+ * TLB and MMU hash table.
+ *
+ *  Derived from arch/ppc64/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <linux/highmem.h>
+
+DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+
+/* This is declared as we are using the more or less generic
+ * include/asm-ppc64/tlb.h file -- tgall
+ */
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+struct pte_freelist_batch
+{
+	struct rcu_head	rcu;
+	unsigned int	index;
+	pgtable_free_t	tables[0];
+};
+
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+#define PTE_FREELIST_SIZE \
+	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
+	  / sizeof(pgtable_free_t))
+
+#ifdef CONFIG_SMP
+static void pte_free_smp_sync(void *arg)
+{
+	/* Do nothing, just ensure we sync with all CPUs */
+}
+#endif
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+static void pgtable_free_now(pgtable_free_t pgf)
+{
+	pte_freelist_forced_free++;
+
+	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+	pgtable_free(pgf);
+}
+
+static void pte_free_rcu_callback(struct rcu_head *head)
+{
+	struct pte_freelist_batch *batch =
+		container_of(head, struct pte_freelist_batch, rcu);
+	unsigned int i;
+
+	for (i = 0; i < batch->index; i++)
+		pgtable_free(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+static void pte_free_submit(struct pte_freelist_batch *batch)
+{
+	INIT_RCU_HEAD(&batch->rcu);
+	call_rcu(&batch->rcu, pte_free_rcu_callback);
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
+{
+	/* This is safe as we are holding page_table_lock */
+        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
+	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+	if (atomic_read(&tlb->mm->mm_users) < 2 ||
+	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
+		pgtable_free(pgf);
+		return;
+	}
+
+	if (*batchp == NULL) {
+		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
+		if (*batchp == NULL) {
+			pgtable_free_now(pgf);
+			return;
+		}
+		(*batchp)->index = 0;
+	}
+	(*batchp)->tables[(*batchp)->index++] = pgf;
+	if ((*batchp)->index == PTE_FREELIST_SIZE) {
+		pte_free_submit(*batchp);
+		*batchp = NULL;
+	}
+}
+
+/*
+ * Update the MMU hash table to correspond with a change to
+ * a Linux PTE.  If wrprot is true, it is permissible to
+ * change the existing HPTE to read-only rather than removing it
+ * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
+ */
+void hpte_update(struct mm_struct *mm, unsigned long addr,
+		 unsigned long pte, int wrprot)
+{
+	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+	unsigned long vsid;
+	int i;
+
+	i = batch->index;
+
+	/*
+	 * This can happen when we are in the middle of a TLB batch and
+	 * we encounter memory pressure (eg copy_page_range when it tries
+	 * to allocate a new pte). If we have to reclaim memory and end
+	 * up scanning and resetting referenced bits then our batch context
+	 * will change mid stream.
+	 */
+	if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
+		flush_tlb_pending();
+		i = 0;
+	}
+	if (i == 0) {
+		batch->mm = mm;
+		batch->large = pte_huge(pte);
+	}
+	if (addr < KERNELBASE) {
+		vsid = get_vsid(mm->context.id, addr);
+		WARN_ON(vsid == 0);
+	} else
+		vsid = get_kernel_vsid(addr);
+	batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
+	batch->pte[i] = __pte(pte);
+	batch->index = ++i;
+	if (i >= PPC64_TLB_BATCH_NR)
+		flush_tlb_pending();
+}
+
+void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
+{
+	int i;
+	int cpu;
+	cpumask_t tmp;
+	int local = 0;
+
+	BUG_ON(in_interrupt());
+
+	cpu = get_cpu();
+	i = batch->index;
+	tmp = cpumask_of_cpu(cpu);
+	if (cpus_equal(batch->mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	if (i == 1)
+		flush_hash_page(batch->vaddr[0], batch->pte[0], local);
+	else
+		flush_hash_range(i, local);
+	batch->index = 0;
+	put_cpu();
+}
+
+void pte_free_finish(void)
+{
+	/* This is safe as we are holding page_table_lock */
+	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+
+	if (*batchp == NULL)
+		return;
+	pte_free_submit(*batchp);
+	*batchp = NULL;
+}
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
index fa889204d6a..4a9928ef303 100644
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -83,7 +83,7 @@ head-y := arch/ppc64/kernel/head.o
 
 libs-y				+= arch/ppc64/lib/
 core-y				+= arch/ppc64/kernel/ arch/powerpc/kernel/
-core-y				+= arch/ppc64/mm/
+core-y				+= arch/powerpc/mm/
 core-y				+= arch/powerpc/platforms/
 core-$(CONFIG_XMON)		+= arch/ppc64/xmon/
 drivers-$(CONFIG_OPROFILE)	+= arch/powerpc/oprofile/
diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile
deleted file mode 100644
index 3695d00d347..00000000000
--- a/arch/ppc64/mm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for the linux ppc-specific parts of the memory manager.
-#
-
-EXTRA_CFLAGS += -mno-minimal-toc
-
-obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \
-	slb_low.o slb.o stab.o mmap.o
-obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o
diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c
deleted file mode 100644
index be3f25cf3e9..00000000000
--- a/arch/ppc64/mm/fault.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- *  arch/ppc/mm/fault.c
- *
- *  PowerPC version 
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Derived from "arch/i386/mm/fault.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  Modified by Cort Dougan and Paul Mackerras.
- *
- *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/smp_lock.h>
-#include <linux/module.h>
-#include <linux/kprobes.h>
-
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/kdebug.h>
-#include <asm/siginfo.h>
-
-/*
- * Check whether the instruction at regs->nip is a store using
- * an update addressing form which will update r1.
- */
-static int store_updates_sp(struct pt_regs *regs)
-{
-	unsigned int inst;
-
-	if (get_user(inst, (unsigned int __user *)regs->nip))
-		return 0;
-	/* check for 1 in the rA field */
-	if (((inst >> 16) & 0x1f) != 1)
-		return 0;
-	/* check major opcode */
-	switch (inst >> 26) {
-	case 37:	/* stwu */
-	case 39:	/* stbu */
-	case 45:	/* sthu */
-	case 53:	/* stfsu */
-	case 55:	/* stfdu */
-		return 1;
-	case 62:	/* std or stdu */
-		return (inst & 3) == 1;
-	case 31:
-		/* check minor opcode */
-		switch ((inst >> 1) & 0x3ff) {
-		case 181:	/* stdux */
-		case 183:	/* stwux */
-		case 247:	/* stbux */
-		case 439:	/* sthux */
-		case 695:	/* stfsux */
-		case 759:	/* stfdux */
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static void do_dabr(struct pt_regs *regs, unsigned long error_code)
-{
-	siginfo_t info;
-
-	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
-			11, SIGSEGV) == NOTIFY_STOP)
-		return;
-
-	if (debugger_dabr_match(regs))
-		return;
-
-	/* Clear the DABR */
-	set_dabr(0);
-
-	/* Deliver the signal to userspace */
-	info.si_signo = SIGTRAP;
-	info.si_errno = 0;
-	info.si_code = TRAP_HWBKPT;
-	info.si_addr = (void __user *)regs->nip;
-	force_sig_info(SIGTRAP, &info, current);
-}
-
-/*
- * The error_code parameter is
- *  - DSISR for a non-SLB data access fault,
- *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
- *  - 0 any SLB fault.
- * The return value is 0 if the fault was handled, or the signal
- * number if this is a kernel fault that can't be handled here.
- */
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
-			    unsigned long error_code)
-{
-	struct vm_area_struct * vma;
-	struct mm_struct *mm = current->mm;
-	siginfo_t info;
-	unsigned long code = SEGV_MAPERR;
-	unsigned long is_write = error_code & DSISR_ISSTORE;
-	unsigned long trap = TRAP(regs);
- 	unsigned long is_exec = trap == 0x400;
-
-	BUG_ON((trap == 0x380) || (trap == 0x480));
-
-	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
-				11, SIGSEGV) == NOTIFY_STOP)
-		return 0;
-
-	if (trap == 0x300) {
-		if (debugger_fault_handler(regs))
-			return 0;
-	}
-
-	/* On a kernel SLB miss we can only check for a valid exception entry */
-	if (!user_mode(regs) && (address >= TASK_SIZE))
-		return SIGSEGV;
-
-  	if (error_code & DSISR_DABRMATCH) {
-		do_dabr(regs, error_code);
-		return 0;
-	}
-
-	if (in_atomic() || mm == NULL) {
-		if (!user_mode(regs))
-			return SIGSEGV;
-		/* in_atomic() in user mode is really bad,
-		   as is current->mm == NULL. */
-		printk(KERN_EMERG "Page fault in user mode with"
-		       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
-		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
-		       regs->nip, regs->msr);
-		die("Weird page fault", regs, SIGSEGV);
-	}
-
-	/* When running in the kernel we expect faults to occur only to
-	 * addresses in user space.  All other faults represent errors in the
-	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
-	 * erroneous fault occuring in a code path which already holds mmap_sem
-	 * we will deadlock attempting to validate the fault against the
-	 * address space.  Luckily the kernel only validly references user
-	 * space from well defined areas of code, which are listed in the
-	 * exceptions table.
-	 *
-	 * As the vast majority of faults will be valid we will only perform
-	 * the source reference check when there is a possibilty of a deadlock.
-	 * Attempt to lock the address space, if we cannot we then validate the
-	 * source.  If this is invalid we can skip the address space check,
-	 * thus avoiding the deadlock.
-	 */
-	if (!down_read_trylock(&mm->mmap_sem)) {
-		if (!user_mode(regs) && !search_exception_tables(regs->nip))
-			goto bad_area_nosemaphore;
-
-		down_read(&mm->mmap_sem);
-	}
-
-	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-
-	if (vma->vm_start <= address) {
-		goto good_area;
-	}
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-
-	/*
-	 * N.B. The POWER/Open ABI allows programs to access up to
-	 * 288 bytes below the stack pointer.
-	 * The kernel signal delivery code writes up to about 1.5kB
-	 * below the stack pointer (r1) before decrementing it.
-	 * The exec code can write slightly over 640kB to the stack
-	 * before setting the user r1.  Thus we allow the stack to
-	 * expand to 1MB without further checks.
-	 */
-	if (address + 0x100000 < vma->vm_end) {
-		/* get user regs even if this fault is in kernel mode */
-		struct pt_regs *uregs = current->thread.regs;
-		if (uregs == NULL)
-			goto bad_area;
-
-		/*
-		 * A user-mode access to an address a long way below
-		 * the stack pointer is only valid if the instruction
-		 * is one which would update the stack pointer to the
-		 * address accessed if the instruction completed,
-		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
-		 * (or the byte, halfword, float or double forms).
-		 *
-		 * If we don't check this then any write to the area
-		 * between the last mapped region and the stack will
-		 * expand the stack rather than segfaulting.
-		 */
-		if (address + 2048 < uregs->gpr[1]
-		    && (!user_mode(regs) || !store_updates_sp(regs)))
-			goto bad_area;
-	}
-
-	if (expand_stack(vma, address))
-		goto bad_area;
-
-good_area:
-	code = SEGV_ACCERR;
-
-	if (is_exec) {
-		/* protection fault */
-		if (error_code & DSISR_PROTFAULT)
-			goto bad_area;
-		if (!(vma->vm_flags & VM_EXEC))
-			goto bad_area;
-	/* a write */
-	} else if (is_write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	/* a read */
-	} else {
-		if (!(vma->vm_flags & VM_READ))
-			goto bad_area;
-	}
-
- survive:
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-	switch (handle_mm_fault(mm, vma, address, is_write)) {
-
-	case VM_FAULT_MINOR:
-		current->min_flt++;
-		break;
-	case VM_FAULT_MAJOR:
-		current->maj_flt++;
-		break;
-	case VM_FAULT_SIGBUS:
-		goto do_sigbus;
-	case VM_FAULT_OOM:
-		goto out_of_memory;
-	default:
-		BUG();
-	}
-
-	up_read(&mm->mmap_sem);
-	return 0;
-
-bad_area:
-	up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
-	/* User mode accesses cause a SIGSEGV */
-	if (user_mode(regs)) {
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_code = code;
-		info.si_addr = (void __user *) address;
-		force_sig_info(SIGSEGV, &info, current);
-		return 0;
-	}
-
-	if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
-	    && printk_ratelimit())
-		printk(KERN_CRIT "kernel tried to execute NX-protected"
-		       " page (%lx) - exploit attempt? (uid: %d)\n",
-		       address, current->uid);
-
-	return SIGSEGV;
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
-	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", current->comm);
-	if (user_mode(regs))
-		do_exit(SIGKILL);
-	return SIGKILL;
-
-do_sigbus:
-	up_read(&mm->mmap_sem);
-	if (user_mode(regs)) {
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRERR;
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGBUS, &info, current);
-		return 0;
-	}
-	return SIGBUS;
-}
-
-/*
- * bad_page_fault is called when we have a bad access from the kernel.
- * It is called from do_page_fault above and from some of the procedures
- * in traps.c.
- */
-void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
-{
-	const struct exception_table_entry *entry;
-
-	/* Are we prepared to handle this fault?  */
-	if ((entry = search_exception_tables(regs->nip)) != NULL) {
-		regs->nip = entry->fixup;
-		return;
-	}
-
-	/* kernel has accessed a bad area */
-	die("Kernel access of bad area", regs, sig);
-}
diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S
deleted file mode 100644
index ee5a5d36bfa..00000000000
--- a/arch/ppc64/mm/hash_low.S
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * ppc64 MMU hashtable management routines
- *
- * (c) Copyright IBM Corp. 2003
- *
- * Maintained by: Benjamin Herrenschmidt
- *                <benh@kernel.crashing.org>
- *
- * This file is covered by the GNU Public Licence v2 as
- * described in the kernel's COPYING file.
- */
-
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-
-	.text
-
-/*
- * Stackframe:
- *		
- *         +-> Back chain			(SP + 256)
- *         |   General register save area	(SP + 112)
- *         |   Parameter save area		(SP + 48)
- *         |   TOC save area			(SP + 40)
- *         |   link editor doubleword		(SP + 32)
- *         |   compiler doubleword		(SP + 24)
- *         |   LR save area			(SP + 16)
- *         |   CR save area			(SP + 8)
- * SP ---> +-- Back chain			(SP + 0)
- */
-#define STACKFRAMESIZE	256
-
-/* Save parameters offsets */
-#define STK_PARM(i)	(STACKFRAMESIZE + 48 + ((i)-3)*8)
-
-/* Save non-volatile offsets */
-#define STK_REG(i)	(112 + ((i)-14)*8)
-
-/*
- * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
- *		pte_t *ptep, unsigned long trap, int local)
- *
- * Adds a page to the hash table. This is the non-LPAR version for now
- */
-
-_GLOBAL(__hash_page)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	/* Save all params that we need after a function call */
-	std	r6,STK_PARM(r6)(r1)
-	std	r8,STK_PARM(r8)(r1)
-	
-	/* Add _PAGE_PRESENT to access */
-	ori	r4,r4,_PAGE_PRESENT
-
-	/* Save non-volatile registers.
-	 * r31 will hold "old PTE"
-	 * r30 is "new PTE"
-	 * r29 is "va"
-	 * r28 is a hash value
-	 * r27 is hashtab mask (maybe dynamic patched instead ?)
-	 */
-	std	r27,STK_REG(r27)(r1)
-	std	r28,STK_REG(r28)(r1)
-	std	r29,STK_REG(r29)(r1)
-	std	r30,STK_REG(r30)(r1)
-	std	r31,STK_REG(r31)(r1)
-	
-	/* Step 1:
-	 *
-	 * Check permissions, atomically mark the linux PTE busy
-	 * and hashed.
-	 */ 
-1:
-	ldarx	r31,0,r6
-	/* Check access rights (access & ~(pte_val(*ptep))) */
-	andc.	r0,r4,r31
-	bne-	htab_wrong_access
-	/* Check if PTE is busy */
-	andi.	r0,r31,_PAGE_BUSY
-	/* If so, just bail out and refault if needed. Someone else
-	 * is changing this PTE anyway and might hash it.
-	 */
-	bne-	bail_ok
-	/* Prepare new PTE value (turn access RW into DIRTY, then
-	 * add BUSY,HASHPTE and ACCESSED)
-	 */
-	rlwinm	r30,r4,32-9+7,31-7,31-7	/* _PAGE_RW -> _PAGE_DIRTY */
-	or	r30,r30,r31
-	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
-	/* Write the linux PTE atomically (setting busy) */
-	stdcx.	r30,0,r6
-	bne-	1b
-	isync
-
-	/* Step 2:
-	 *
-	 * Insert/Update the HPTE in the hash table. At this point,
-	 * r4 (access) is re-useable, we use it for the new HPTE flags
-	 */
-
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
-
-	/* Calculate hash value for primary slot and store it in r28 */
-	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
-	rldicl	r0,r3,64-12,48		/* (ea >> 12) & 0xffff */
-	xor	r28,r5,r0
-
-	/* Convert linux PTE bits into HW equivalents */
-	andi.	r3,r30,0x1fe		/* Get basic set of flags */
-	xori	r3,r3,HW_NO_EXEC	/* _PAGE_EXEC -> NOEXEC */
-	rlwinm	r0,r30,32-9+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
-	rlwinm	r4,r30,32-7+1,30,30	/* _PAGE_DIRTY -> _PAGE_USER (r4) */
-	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
-	andc	r0,r30,r0		/* r0 = pte & ~r0 */
-	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
-
-	/* We eventually do the icache sync here (maybe inline that
-	 * code rather than call a C function...) 
-	 */
-BEGIN_FTR_SECTION
-	mr	r4,r30
-	mr	r5,r7
-	bl	.hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
-	/* At this point, r3 contains new PP bits, save them in
-	 * place of "access" in the param area (sic)
-	 */
-	std	r3,STK_PARM(r4)(r1)
-
-	/* Get htab_hash_mask */
-	ld	r4,htab_hash_mask@got(2)
-	ld	r27,0(r4)	/* htab_hash_mask -> r27 */
-
-	/* Check if we may already be in the hashtable, in this case, we
-	 * go to out-of-line code to try to modify the HPTE
-	 */
-	andi.	r0,r31,_PAGE_HASHPTE
-	bne	htab_modify_pte
-
-htab_insert_pte:
-	/* Clear hpte bits in new pte (we also clear BUSY btw) and
-	 * add _PAGE_HASHPTE
-	 */
-	lis	r0,_PAGE_HPTEFLAGS@h
-	ori	r0,r0,_PAGE_HPTEFLAGS@l
-	andc	r30,r30,r0
-	ori	r30,r30,_PAGE_HASHPTE
-
-	/* page number in r5 */
-	rldicl	r5,r31,64-PTE_SHIFT,PTE_SHIFT
-
-	/* Calculate primary group hash */
-	and	r0,r28,r27
-	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-
-	/* Call ppc_md.hpte_insert */
-	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
-	mr	r4,r29			/* Retreive va */
-	li	r6,0			/* no vflags */
-_GLOBAL(htab_call_hpte_insert1)
-	bl	.			/* Will be patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge	htab_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	htab_pte_insert_failure
-
-	/* Now try secondary slot */
-	
-	/* page number in r5 */
-	rldicl	r5,r31,64-PTE_SHIFT,PTE_SHIFT
-
-	/* Calculate secondary group hash */
-	andc	r0,r27,r28
-	rldicr	r3,r0,3,63-3	/* r0 = (~hash & mask) << 3 */
-	
-	/* Call ppc_md.hpte_insert */
-	ld	r7,STK_PARM(r4)(r1)	/* Retreive new pp bits */
-	mr	r4,r29			/* Retreive va */
-	li	r6,HPTE_V_SECONDARY@l	/* secondary slot */
-_GLOBAL(htab_call_hpte_insert2)
-	bl	.			/* Will be patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge+	htab_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	htab_pte_insert_failure
-
-	/* Both are full, we need to evict something */
-	mftb	r0
-	/* Pick a random group based on TB */
-	andi.	r0,r0,1
-	mr	r5,r28
-	bne	2f
-	not	r5,r5
-2:	and	r0,r5,r27
-	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */	
-	/* Call ppc_md.hpte_remove */
-_GLOBAL(htab_call_hpte_remove)
-	bl	.			/* Will be patched by htab_finish_init() */
-
-	/* Try all again */
-	b	htab_insert_pte	
-
-bail_ok:
-	li	r3,0
-	b	bail
-
-htab_pte_insert_ok:
-	/* Insert slot number & secondary bit in PTE */
-	rldimi	r30,r3,12,63-15
-		
-	/* Write out the PTE with a normal write
-	 * (maybe add eieio may be good still ?)
-	 */
-htab_write_out_pte:
-	ld	r6,STK_PARM(r6)(r1)
-	std	r30,0(r6)
-	li	r3, 0
-bail:
-	ld	r27,STK_REG(r27)(r1)
-	ld	r28,STK_REG(r28)(r1)
-	ld	r29,STK_REG(r29)(r1)
-	ld      r30,STK_REG(r30)(r1)
-	ld      r31,STK_REG(r31)(r1)
-	addi    r1,r1,STACKFRAMESIZE
-	ld      r0,16(r1)
-	mtlr    r0
-	blr
-
-htab_modify_pte:
-	/* Keep PP bits in r4 and slot idx from the PTE around in r3 */
-	mr	r4,r3
-	rlwinm	r3,r31,32-12,29,31
-
-	/* Secondary group ? if yes, get a inverted hash value */
-	mr	r5,r28
-	andi.	r0,r31,_PAGE_SECONDARY
-	beq	1f
-	not	r5,r5
-1:
-	/* Calculate proper slot value for ppc_md.hpte_updatepp */
-	and	r0,r5,r27
-	rldicr	r0,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-	add	r3,r0,r3	/* add slot idx */
-
-	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
-	li	r6,0			/* large is 0 */
-	ld	r7,STK_PARM(r8)(r1)	/* get "local" param */
-_GLOBAL(htab_call_hpte_updatepp)
-	bl	.			/* Will be patched by htab_finish_init() */
-
-	/* if we failed because typically the HPTE wasn't really here
-	 * we try an insertion. 
-	 */
-	cmpdi	0,r3,-1
-	beq-	htab_insert_pte
-
-	/* Clear the BUSY bit and Write out the PTE */
-	li	r0,_PAGE_BUSY
-	andc	r30,r30,r0
-	b	htab_write_out_pte
-
-htab_wrong_access:
-	/* Bail out clearing reservation */
-	stdcx.	r31,0,r6
-	li	r3,1
-	b	bail
-
-htab_pte_insert_failure:
-	/* Bail out restoring old PTE */
-	ld	r6,STK_PARM(r6)(r1)
-	std	r31,0(r6)
-	li	r3,-1
-	b	bail
-
-
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
deleted file mode 100644
index 174d14576c2..00000000000
--- a/arch/ppc64/mm/hash_native.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * native hashtable management.
- *
- * SMP scalability work:
- *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/spinlock.h>
-#include <linux/bitops.h>
-#include <linux/threads.h>
-#include <linux/smp.h>
-
-#include <asm/abs_addr.h>
-#include <asm/machdep.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <asm/tlb.h>
-#include <asm/cputable.h>
-
-#define HPTE_LOCK_BIT 3
-
-static DEFINE_SPINLOCK(native_tlbie_lock);
-
-static inline void native_lock_hpte(hpte_t *hptep)
-{
-	unsigned long *word = &hptep->v;
-
-	while (1) {
-		if (!test_and_set_bit(HPTE_LOCK_BIT, word))
-			break;
-		while(test_bit(HPTE_LOCK_BIT, word))
-			cpu_relax();
-	}
-}
-
-static inline void native_unlock_hpte(hpte_t *hptep)
-{
-	unsigned long *word = &hptep->v;
-
-	asm volatile("lwsync":::"memory");
-	clear_bit(HPTE_LOCK_BIT, word);
-}
-
-long native_hpte_insert(unsigned long hpte_group, unsigned long va,
-			unsigned long prpn, unsigned long vflags,
-			unsigned long rflags)
-{
-	hpte_t *hptep = htab_address + hpte_group;
-	unsigned long hpte_v, hpte_r;
-	int i;
-
-	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		if (! (hptep->v & HPTE_V_VALID)) {
-			/* retry with lock held */
-			native_lock_hpte(hptep);
-			if (! (hptep->v & HPTE_V_VALID))
-				break;
-			native_unlock_hpte(hptep);
-		}
-
-		hptep++;
-	}
-
-	if (i == HPTES_PER_GROUP)
-		return -1;
-
-	hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
-	if (vflags & HPTE_V_LARGE)
-		va &= ~(1UL << HPTE_V_AVPN_SHIFT);
-	hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
-
-	hptep->r = hpte_r;
-	/* Guarantee the second dword is visible before the valid bit */
-	__asm__ __volatile__ ("eieio" : : : "memory");
-	/*
-	 * Now set the first dword including the valid bit
-	 * NOTE: this also unlocks the hpte
-	 */
-	hptep->v = hpte_v;
-
-	__asm__ __volatile__ ("ptesync" : : : "memory");
-
-	return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
-}
-
-static long native_hpte_remove(unsigned long hpte_group)
-{
-	hpte_t *hptep;
-	int i;
-	int slot_offset;
-	unsigned long hpte_v;
-
-	/* pick a random entry to start at */
-	slot_offset = mftb() & 0x7;
-
-	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		hptep = htab_address + hpte_group + slot_offset;
-		hpte_v = hptep->v;
-
-		if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
-			/* retry with lock held */
-			native_lock_hpte(hptep);
-			hpte_v = hptep->v;
-			if ((hpte_v & HPTE_V_VALID)
-			    && !(hpte_v & HPTE_V_BOLTED))
-				break;
-			native_unlock_hpte(hptep);
-		}
-
-		slot_offset++;
-		slot_offset &= 0x7;
-	}
-
-	if (i == HPTES_PER_GROUP)
-		return -1;
-
-	/* Invalidate the hpte. NOTE: this also unlocks it */
-	hptep->v = 0;
-
-	return i;
-}
-
-static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
-{
-	unsigned long old;
-	unsigned long *p = &addr->r;
-
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%3\n\
-		rldimi	%0,%2,0,61\n\
-		stdcx.	%0,0,%3\n\
-		bne	1b"
-	: "=&r" (old), "=m" (*p)
-	: "r" (pp), "r" (p), "m" (*p)
-	: "cc");
-}
-
-/*
- * Only works on small pages. Yes its ugly to have to check each slot in
- * the group but we only use this during bootup.
- */
-static long native_hpte_find(unsigned long vpn)
-{
-	hpte_t *hptep;
-	unsigned long hash;
-	unsigned long i, j;
-	long slot;
-	unsigned long hpte_v;
-
-	hash = hpt_hash(vpn, 0);
-
-	for (j = 0; j < 2; j++) {
-		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		for (i = 0; i < HPTES_PER_GROUP; i++) {
-			hptep = htab_address + slot;
-			hpte_v = hptep->v;
-
-			if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
-			    && (hpte_v & HPTE_V_VALID)
-			    && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
-				/* HPTE matches */
-				if (j)
-					slot = -slot;
-				return slot;
-			}
-			++slot;
-		}
-		hash = ~hash;
-	}
-
-	return -1;
-}
-
-static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
-				 unsigned long va, int large, int local)
-{
-	hpte_t *hptep = htab_address + slot;
-	unsigned long hpte_v;
-	unsigned long avpn = va >> 23;
-	int ret = 0;
-
-	if (large)
-		avpn &= ~1;
-
-	native_lock_hpte(hptep);
-
-	hpte_v = hptep->v;
-
-	/* Even if we miss, we need to invalidate the TLB */
-	if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
-	    || !(hpte_v & HPTE_V_VALID)) {
-		native_unlock_hpte(hptep);
-		ret = -1;
-	} else {
-		set_pp_bit(newpp, hptep);
-		native_unlock_hpte(hptep);
-	}
-
-	/* Ensure it is out of the tlb too */
-	if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
-		tlbiel(va);
-	} else {
-		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
-		if (lock_tlbie)
-			spin_lock(&native_tlbie_lock);
-		tlbie(va, large);
-		if (lock_tlbie)
-			spin_unlock(&native_tlbie_lock);
-	}
-
-	return ret;
-}
-
-/*
- * Update the page protection bits. Intended to be used to create
- * guard pages for kernel data structures on pages which are bolted
- * in the HPT. Assumes pages being operated on will not be stolen.
- * Does not work on large pages.
- *
- * No need to lock here because we should be the only user.
- */
-static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
-{
-	unsigned long vsid, va, vpn, flags = 0;
-	long slot;
-	hpte_t *hptep;
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
-	vsid = get_kernel_vsid(ea);
-	va = (vsid << 28) | (ea & 0x0fffffff);
-	vpn = va >> PAGE_SHIFT;
-
-	slot = native_hpte_find(vpn);
-	if (slot == -1)
-		panic("could not find page to bolt\n");
-	hptep = htab_address + slot;
-
-	set_pp_bit(newpp, hptep);
-
-	/* Ensure it is out of the tlb too */
-	if (lock_tlbie)
-		spin_lock_irqsave(&native_tlbie_lock, flags);
-	tlbie(va, 0);
-	if (lock_tlbie)
-		spin_unlock_irqrestore(&native_tlbie_lock, flags);
-}
-
-static void native_hpte_invalidate(unsigned long slot, unsigned long va,
-				    int large, int local)
-{
-	hpte_t *hptep = htab_address + slot;
-	unsigned long hpte_v;
-	unsigned long avpn = va >> 23;
-	unsigned long flags;
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
-	if (large)
-		avpn &= ~1;
-
-	local_irq_save(flags);
-	native_lock_hpte(hptep);
-
-	hpte_v = hptep->v;
-
-	/* Even if we miss, we need to invalidate the TLB */
-	if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
-	    || !(hpte_v & HPTE_V_VALID)) {
-		native_unlock_hpte(hptep);
-	} else {
-		/* Invalidate the hpte. NOTE: this also unlocks it */
-		hptep->v = 0;
-	}
-
-	/* Invalidate the tlb */
-	if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
-		tlbiel(va);
-	} else {
-		if (lock_tlbie)
-			spin_lock(&native_tlbie_lock);
-		tlbie(va, large);
-		if (lock_tlbie)
-			spin_unlock(&native_tlbie_lock);
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * clear all mappings on kexec.  All cpus are in real mode (or they will
- * be when they isi), and we are the only one left.  We rely on our kernel
- * mapping being 0xC0's and the hardware ignoring those two real bits.
- *
- * TODO: add batching support when enabled.  remember, no dynamic memory here,
- * athough there is the control page available...
- */
-static void native_hpte_clear(void)
-{
-	unsigned long slot, slots, flags;
-	hpte_t *hptep = htab_address;
-	unsigned long hpte_v;
-	unsigned long pteg_count;
-
-	pteg_count = htab_hash_mask + 1;
-
-	local_irq_save(flags);
-
-	/* we take the tlbie lock and hold it.  Some hardware will
-	 * deadlock if we try to tlbie from two processors at once.
-	 */
-	spin_lock(&native_tlbie_lock);
-
-	slots = pteg_count * HPTES_PER_GROUP;
-
-	for (slot = 0; slot < slots; slot++, hptep++) {
-		/*
-		 * we could lock the pte here, but we are the only cpu
-		 * running,  right?  and for crash dump, we probably
-		 * don't want to wait for a maybe bad cpu.
-		 */
-		hpte_v = hptep->v;
-
-		if (hpte_v & HPTE_V_VALID) {
-			hptep->v = 0;
-			tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
-		}
-	}
-
-	spin_unlock(&native_tlbie_lock);
-	local_irq_restore(flags);
-}
-
-static void native_flush_hash_range(unsigned long number, int local)
-{
-	unsigned long va, vpn, hash, secondary, slot, flags, avpn;
-	int i, j;
-	hpte_t *hptep;
-	unsigned long hpte_v;
-	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	unsigned long large = batch->large;
-
-	local_irq_save(flags);
-
-	j = 0;
-	for (i = 0; i < number; i++) {
-		va = batch->vaddr[j];
-		if (large)
-			vpn = va >> HPAGE_SHIFT;
-		else
-			vpn = va >> PAGE_SHIFT;
-		hash = hpt_hash(vpn, large);
-		secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
-		if (secondary)
-			hash = ~hash;
-		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
-
-		hptep = htab_address + slot;
-
-		avpn = va >> 23;
-		if (large)
-			avpn &= ~0x1UL;
-
-		native_lock_hpte(hptep);
-
-		hpte_v = hptep->v;
-
-		/* Even if we miss, we need to invalidate the TLB */
-		if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
-		    || !(hpte_v & HPTE_V_VALID)) {
-			native_unlock_hpte(hptep);
-		} else {
-			/* Invalidate the hpte. NOTE: this also unlocks it */
-			hptep->v = 0;
-		}
-
-		j++;
-	}
-
-	if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
-		asm volatile("ptesync":::"memory");
-
-		for (i = 0; i < j; i++)
-			__tlbiel(batch->vaddr[i]);
-
-		asm volatile("ptesync":::"memory");
-	} else {
-		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
-		if (lock_tlbie)
-			spin_lock(&native_tlbie_lock);
-
-		asm volatile("ptesync":::"memory");
-
-		for (i = 0; i < j; i++)
-			__tlbie(batch->vaddr[i], large);
-
-		asm volatile("eieio; tlbsync; ptesync":::"memory");
-
-		if (lock_tlbie)
-			spin_unlock(&native_tlbie_lock);
-	}
-
-	local_irq_restore(flags);
-}
-
-#ifdef CONFIG_PPC_PSERIES
-/* Disable TLB batching on nighthawk */
-static inline int tlb_batching_enabled(void)
-{
-	struct device_node *root = of_find_node_by_path("/");
-	int enabled = 1;
-
-	if (root) {
-		const char *model = get_property(root, "model", NULL);
-		if (model && !strcmp(model, "IBM,9076-N81"))
-			enabled = 0;
-		of_node_put(root);
-	}
-
-	return enabled;
-}
-#else
-static inline int tlb_batching_enabled(void)
-{
-	return 1;
-}
-#endif
-
-void hpte_init_native(void)
-{
-	ppc_md.hpte_invalidate	= native_hpte_invalidate;
-	ppc_md.hpte_updatepp	= native_hpte_updatepp;
-	ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
-	ppc_md.hpte_insert	= native_hpte_insert;
-	ppc_md.hpte_remove	= native_hpte_remove;
-	ppc_md.hpte_clear_all	= native_hpte_clear;
-	if (tlb_batching_enabled())
-		ppc_md.flush_hash_range = native_flush_hash_range;
-	htab_finish_init();
-}
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
deleted file mode 100644
index 83507438d6a..00000000000
--- a/arch/ppc64/mm/hash_utils.c
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * PowerPC64 port by Mike Corrigan and Dave Engebretsen
- *   {mikejc|engebret}@us.ibm.com
- *
- *    Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
- *
- * SMP scalability work:
- *    Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- * 
- *    Module name: htab.c
- *
- *    Description:
- *      PowerPC Hashed Page Table functions
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#undef DEBUG
-
-#include <linux/config.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include <linux/ctype.h>
-#include <linux/cache.h>
-#include <linux/init.h>
-#include <linux/signal.h>
-
-#include <asm/ppcdebug.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/machdep.h>
-#include <asm/lmb.h>
-#include <asm/abs_addr.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-#include <asm/eeh.h>
-#include <asm/tlb.h>
-#include <asm/cacheflush.h>
-#include <asm/cputable.h>
-#include <asm/abs_addr.h>
-#include <asm/sections.h>
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/*
- * Note:  pte   --> Linux PTE
- *        HPTE  --> PowerPC Hashed Page Table Entry
- *
- * Execution context:
- *   htab_initialize is called with the MMU off (of course), but
- *   the kernel has been copied down to zero so it can directly
- *   reference global data.  At this point it is very difficult
- *   to print debug info.
- *
- */
-
-#ifdef CONFIG_U3_DART
-extern unsigned long dart_tablebase;
-#endif /* CONFIG_U3_DART */
-
-hpte_t *htab_address;
-unsigned long htab_hash_mask;
-
-extern unsigned long _SDR1;
-
-#define KB (1024)
-#define MB (1024*KB)
-
-static inline void loop_forever(void)
-{
-	volatile unsigned long x = 1;
-	for(;x;x|=1)
-		;
-}
-
-static inline void create_pte_mapping(unsigned long start, unsigned long end,
-				      unsigned long mode, int large)
-{
-	unsigned long addr;
-	unsigned int step;
-	unsigned long tmp_mode;
-	unsigned long vflags;
-
-	if (large) {
-		step = 16*MB;
-		vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
-	} else {
-		step = 4*KB;
-		vflags = HPTE_V_BOLTED;
-	}
-
-	for (addr = start; addr < end; addr += step) {
-		unsigned long vpn, hash, hpteg;
-		unsigned long vsid = get_kernel_vsid(addr);
-		unsigned long va = (vsid << 28) | (addr & 0xfffffff);
-		int ret = -1;
-
-		if (large)
-			vpn = va >> HPAGE_SHIFT;
-		else
-			vpn = va >> PAGE_SHIFT;
-
-
-		tmp_mode = mode;
-		
-		/* Make non-kernel text non-executable */
-		if (!in_kernel_text(addr))
-			tmp_mode = mode | HW_NO_EXEC;
-
-		hash = hpt_hash(vpn, large);
-
-		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
-#ifdef CONFIG_PPC_ISERIES
-		if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
-			ret = iSeries_hpte_bolt_or_insert(hpteg, va,
-				virt_to_abs(addr) >> PAGE_SHIFT,
-				vflags, tmp_mode);
-		else
-#endif
-#ifdef CONFIG_PPC_PSERIES
-		if (systemcfg->platform & PLATFORM_LPAR)
-			ret = pSeries_lpar_hpte_insert(hpteg, va,
-				virt_to_abs(addr) >> PAGE_SHIFT,
-				vflags, tmp_mode);
-		else
-#endif
-#ifdef CONFIG_PPC_MULTIPLATFORM
-			ret = native_hpte_insert(hpteg, va,
-				virt_to_abs(addr) >> PAGE_SHIFT,
-				vflags, tmp_mode);
-#endif
-
-		if (ret == -1) {
-			ppc64_terminate_msg(0x20, "create_pte_mapping");
-			loop_forever();
-		}
-	}
-}
-
-void __init htab_initialize(void)
-{
-	unsigned long table, htab_size_bytes;
-	unsigned long pteg_count;
-	unsigned long mode_rw;
-	int i, use_largepages = 0;
-	unsigned long base = 0, size = 0;
-	extern unsigned long tce_alloc_start, tce_alloc_end;
-
-	DBG(" -> htab_initialize()\n");
-
-	/*
-	 * Calculate the required size of the htab.  We want the number of
-	 * PTEGs to equal one half the number of real pages.
-	 */ 
-	htab_size_bytes = 1UL << ppc64_pft_size;
-	pteg_count = htab_size_bytes >> 7;
-
-	/* For debug, make the HTAB 1/8 as big as it normally would be. */
-	ifppcdebug(PPCDBG_HTABSIZE) {
-		pteg_count >>= 3;
-		htab_size_bytes = pteg_count << 7;
-	}
-
-	htab_hash_mask = pteg_count - 1;
-
-	if (systemcfg->platform & PLATFORM_LPAR) {
-		/* Using a hypervisor which owns the htab */
-		htab_address = NULL;
-		_SDR1 = 0; 
-	} else {
-		/* Find storage for the HPT.  Must be contiguous in
-		 * the absolute address space.
-		 */
-		table = lmb_alloc(htab_size_bytes, htab_size_bytes);
-
-		DBG("Hash table allocated at %lx, size: %lx\n", table,
-		    htab_size_bytes);
-
-		if ( !table ) {
-			ppc64_terminate_msg(0x20, "hpt space");
-			loop_forever();
-		}
-		htab_address = abs_to_virt(table);
-
-		/* htab absolute addr + encoded htabsize */
-		_SDR1 = table + __ilog2(pteg_count) - 11;
-
-		/* Initialize the HPT with no entries */
-		memset((void *)table, 0, htab_size_bytes);
-	}
-
-	mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
-
-	/* On U3 based machines, we need to reserve the DART area and
-	 * _NOT_ map it to avoid cache paradoxes as it's remapped non
-	 * cacheable later on
-	 */
-	if (cpu_has_feature(CPU_FTR_16M_PAGE))
-		use_largepages = 1;
-
-	/* create bolted the linear mapping in the hash table */
-	for (i=0; i < lmb.memory.cnt; i++) {
-		base = lmb.memory.region[i].base + KERNELBASE;
-		size = lmb.memory.region[i].size;
-
-		DBG("creating mapping for region: %lx : %lx\n", base, size);
-
-#ifdef CONFIG_U3_DART
-		/* Do not map the DART space. Fortunately, it will be aligned
-		 * in such a way that it will not cross two lmb regions and will
-		 * fit within a single 16Mb page.
-		 * The DART space is assumed to be a full 16Mb region even if we
-		 * only use 2Mb of that space. We will use more of it later for
-		 * AGP GART. We have to use a full 16Mb large page.
-		 */
-		DBG("DART base: %lx\n", dart_tablebase);
-
-		if (dart_tablebase != 0 && dart_tablebase >= base
-		    && dart_tablebase < (base + size)) {
-			if (base != dart_tablebase)
-				create_pte_mapping(base, dart_tablebase, mode_rw,
-						   use_largepages);
-			if ((base + size) > (dart_tablebase + 16*MB))
-				create_pte_mapping(dart_tablebase + 16*MB, base + size,
-						   mode_rw, use_largepages);
-			continue;
-		}
-#endif /* CONFIG_U3_DART */
-		create_pte_mapping(base, base + size, mode_rw, use_largepages);
-	}
-
-	/*
-	 * If we have a memory_limit and we've allocated TCEs then we need to
-	 * explicitly map the TCE area at the top of RAM. We also cope with the
-	 * case that the TCEs start below memory_limit.
-	 * tce_alloc_start/end are 16MB aligned so the mapping should work
-	 * for either 4K or 16MB pages.
-	 */
-	if (tce_alloc_start) {
-		tce_alloc_start += KERNELBASE;
-		tce_alloc_end += KERNELBASE;
-
-		if (base + size >= tce_alloc_start)
-			tce_alloc_start = base + size + 1;
-
-		create_pte_mapping(tce_alloc_start, tce_alloc_end,
-			mode_rw, use_largepages);
-	}
-
-	DBG(" <- htab_initialize()\n");
-}
-#undef KB
-#undef MB
-
-/*
- * Called by asm hashtable.S for doing lazy icache flush
- */
-unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
-{
-	struct page *page;
-
-	if (!pfn_valid(pte_pfn(pte)))
-		return pp;
-
-	page = pte_page(pte);
-
-	/* page is dirty */
-	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
-		if (trap == 0x400) {
-			__flush_dcache_icache(page_address(page));
-			set_bit(PG_arch_1, &page->flags);
-		} else
-			pp |= HW_NO_EXEC;
-	}
-	return pp;
-}
-
-/* Result code is:
- *  0 - handled
- *  1 - normal page fault
- * -1 - critical hash insertion error
- */
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
-{
-	void *pgdir;
-	unsigned long vsid;
-	struct mm_struct *mm;
-	pte_t *ptep;
-	int ret;
-	int user_region = 0;
-	int local = 0;
-	cpumask_t tmp;
-
-	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
-		return 1;
-
- 	switch (REGION_ID(ea)) {
-	case USER_REGION_ID:
-		user_region = 1;
-		mm = current->mm;
-		if (! mm)
-			return 1;
-
-		vsid = get_vsid(mm->context.id, ea);
-		break;
-	case VMALLOC_REGION_ID:
-		mm = &init_mm;
-		vsid = get_kernel_vsid(ea);
-		break;
-#if 0
-	case KERNEL_REGION_ID:
-		/*
-		 * Should never get here - entire 0xC0... region is bolted.
-		 * Send the problem up to do_page_fault 
-		 */
-#endif
-	default:
-		/* Not a valid range
-		 * Send the problem up to do_page_fault 
-		 */
-		return 1;
-		break;
-	}
-
-	pgdir = mm->pgd;
-
-	if (pgdir == NULL)
-		return 1;
-
-	tmp = cpumask_of_cpu(smp_processor_id());
-	if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
-		local = 1;
-
-	/* Is this a huge page ? */
-	if (unlikely(in_hugepage_area(mm->context, ea)))
-		ret = hash_huge_page(mm, access, ea, vsid, local);
-	else {
-		ptep = find_linux_pte(pgdir, ea);
-		if (ptep == NULL)
-			return 1;
-		ret = __hash_page(ea, access, vsid, ptep, trap, local);
-	}
-
-	return ret;
-}
-
-void flush_hash_page(unsigned long va, pte_t pte, int local)
-{
-	unsigned long vpn, hash, secondary, slot;
-	unsigned long huge = pte_huge(pte);
-
-	if (huge)
-		vpn = va >> HPAGE_SHIFT;
-	else
-		vpn = va >> PAGE_SHIFT;
-	hash = hpt_hash(vpn, huge);
-	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
-	if (secondary)
-		hash = ~hash;
-	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-
-	ppc_md.hpte_invalidate(slot, va, huge, local);
-}
-
-void flush_hash_range(unsigned long number, int local)
-{
-	if (ppc_md.flush_hash_range) {
-		ppc_md.flush_hash_range(number, local);
-	} else {
-		int i;
-		struct ppc64_tlb_batch *batch =
-			&__get_cpu_var(ppc64_tlb_batch);
-
-		for (i = 0; i < number; i++)
-			flush_hash_page(batch->vaddr[i], batch->pte[i], local);
-	}
-}
-
-static inline void make_bl(unsigned int *insn_addr, void *func)
-{
-	unsigned long funcp = *((unsigned long *)func);
-	int offset = funcp - (unsigned long)insn_addr;
-
-	*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
-}
-
-/*
- * low_hash_fault is called when we the low level hash code failed
- * to instert a PTE due to an hypervisor error
- */
-void low_hash_fault(struct pt_regs *regs, unsigned long address)
-{
-	if (user_mode(regs)) {
-		siginfo_t info;
-
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRERR;
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGBUS, &info, current);
-		return;
-	}
-	bad_page_fault(regs, address, SIGBUS);
-}
-
-void __init htab_finish_init(void)
-{
-	extern unsigned int *htab_call_hpte_insert1;
-	extern unsigned int *htab_call_hpte_insert2;
-	extern unsigned int *htab_call_hpte_remove;
-	extern unsigned int *htab_call_hpte_updatepp;
-
-	make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
-}
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
deleted file mode 100644
index 0ea0994ed97..00000000000
--- a/arch/ppc64/mm/hugetlbpage.c
+++ /dev/null
@@ -1,745 +0,0 @@
-/*
- * PPC64 (POWER4) Huge TLB Page Support for Kernel.
- *
- * Copyright (C) 2003 David Gibson, IBM Corporation.
- *
- * Based on the IA-32 version:
- * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/sysctl.h>
-#include <asm/mman.h>
-#include <asm/pgalloc.h>
-#include <asm/tlb.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/machdep.h>
-#include <asm/cputable.h>
-#include <asm/tlb.h>
-
-#include <linux/sysctl.h>
-
-#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
-#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
-
-/* Modelled after find_linux_pte() */
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pg;
-	pud_t *pu;
-	pmd_t *pm;
-	pte_t *pt;
-
-	BUG_ON(! in_hugepage_area(mm->context, addr));
-
-	addr &= HPAGE_MASK;
-
-	pg = pgd_offset(mm, addr);
-	if (!pgd_none(*pg)) {
-		pu = pud_offset(pg, addr);
-		if (!pud_none(*pu)) {
-			pm = pmd_offset(pu, addr);
-			pt = (pte_t *)pm;
-			BUG_ON(!pmd_none(*pm)
-			       && !(pte_present(*pt) && pte_huge(*pt)));
-			return pt;
-		}
-	}
-
-	return NULL;
-}
-
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pg;
-	pud_t *pu;
-	pmd_t *pm;
-	pte_t *pt;
-
-	BUG_ON(! in_hugepage_area(mm->context, addr));
-
-	addr &= HPAGE_MASK;
-
-	pg = pgd_offset(mm, addr);
-	pu = pud_alloc(mm, pg, addr);
-
-	if (pu) {
-		pm = pmd_alloc(mm, pu, addr);
-		if (pm) {
-			pt = (pte_t *)pm;
-			BUG_ON(!pmd_none(*pm)
-			       && !(pte_present(*pt) && pte_huge(*pt)));
-			return pt;
-		}
-	}
-
-	return NULL;
-}
-
-#define HUGEPTE_BATCH_SIZE	(HPAGE_SIZE / PMD_SIZE)
-
-void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-		     pte_t *ptep, pte_t pte)
-{
-	int i;
-
-	if (pte_present(*ptep)) {
-		pte_clear(mm, addr, ptep);
-		flush_tlb_pending();
-	}
-
-	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
-		*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
-		ptep++;
-	}
-}
-
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep)
-{
-	unsigned long old = pte_update(ptep, ~0UL);
-	int i;
-
-	if (old & _PAGE_HASHPTE)
-		hpte_update(mm, addr, old, 0);
-
-	for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
-		ptep[i] = __pte(0);
-
-	return __pte(old);
-}
-
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
-	if (len & ~HPAGE_MASK)
-		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
-		return -EINVAL;
-	if (! (within_hugepage_low_range(addr, len)
-	       || within_hugepage_high_range(addr, len)) )
-		return -EINVAL;
-	return 0;
-}
-
-static void flush_low_segments(void *parm)
-{
-	u16 areas = (unsigned long) parm;
-	unsigned long i;
-
-	asm volatile("isync" : : : "memory");
-
-	BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
-
-	for (i = 0; i < NUM_LOW_AREAS; i++) {
-		if (! (areas & (1U << i)))
-			continue;
-		asm volatile("slbie %0"
-			     : : "r" ((i << SID_SHIFT) | SLBIE_C));
-	}
-
-	asm volatile("isync" : : : "memory");
-}
-
-static void flush_high_segments(void *parm)
-{
-	u16 areas = (unsigned long) parm;
-	unsigned long i, j;
-
-	asm volatile("isync" : : : "memory");
-
-	BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
-
-	for (i = 0; i < NUM_HIGH_AREAS; i++) {
-		if (! (areas & (1U << i)))
-			continue;
-		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
-			asm volatile("slbie %0"
-				     :: "r" (((i << HTLB_AREA_SHIFT)
-					     + (j << SID_SHIFT)) | SLBIE_C));
-	}
-
-	asm volatile("isync" : : : "memory");
-}
-
-static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
-	unsigned long start = area << SID_SHIFT;
-	unsigned long end = (area+1) << SID_SHIFT;
-	struct vm_area_struct *vma;
-
-	BUG_ON(area >= NUM_LOW_AREAS);
-
-	/* Check no VMAs are in the region */
-	vma = find_vma(mm, start);
-	if (vma && (vma->vm_start < end))
-		return -EBUSY;
-
-	return 0;
-}
-
-static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
-	unsigned long start = area << HTLB_AREA_SHIFT;
-	unsigned long end = (area+1) << HTLB_AREA_SHIFT;
-	struct vm_area_struct *vma;
-
-	BUG_ON(area >= NUM_HIGH_AREAS);
-
-	/* Check no VMAs are in the region */
-	vma = find_vma(mm, start);
-	if (vma && (vma->vm_start < end))
-		return -EBUSY;
-
-	return 0;
-}
-
-static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
-	unsigned long i;
-
-	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
-	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
-
-	newareas &= ~(mm->context.low_htlb_areas);
-	if (! newareas)
-		return 0; /* The segments we want are already open */
-
-	for (i = 0; i < NUM_LOW_AREAS; i++)
-		if ((1 << i) & newareas)
-			if (prepare_low_area_for_htlb(mm, i) != 0)
-				return -EBUSY;
-
-	mm->context.low_htlb_areas |= newareas;
-
-	/* update the paca copy of the context struct */
-	get_paca()->context = mm->context;
-
-	/* the context change must make it to memory before the flush,
-	 * so that further SLB misses do the right thing. */
-	mb();
-	on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
-
-	return 0;
-}
-
-static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
-	unsigned long i;
-
-	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
-	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
-		     != NUM_HIGH_AREAS);
-
-	newareas &= ~(mm->context.high_htlb_areas);
-	if (! newareas)
-		return 0; /* The areas we want are already open */
-
-	for (i = 0; i < NUM_HIGH_AREAS; i++)
-		if ((1 << i) & newareas)
-			if (prepare_high_area_for_htlb(mm, i) != 0)
-				return -EBUSY;
-
-	mm->context.high_htlb_areas |= newareas;
-
-	/* update the paca copy of the context struct */
-	get_paca()->context = mm->context;
-
-	/* the context change must make it to memory before the flush,
-	 * so that further SLB misses do the right thing. */
-	mb();
-	on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
-
-	return 0;
-}
-
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
-{
-	int err;
-
-	if ( (addr+len) < addr )
-		return -EINVAL;
-
-	if ((addr + len) < 0x100000000UL)
-		err = open_low_hpage_areas(current->mm,
-					  LOW_ESID_MASK(addr, len));
-	else
-		err = open_high_hpage_areas(current->mm,
-					    HTLB_AREA_MASK(addr, len));
-	if (err) {
-		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
-		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
-		       addr, len,
-		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
-		return err;
-	}
-
-	return 0;
-}
-
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
-	pte_t *ptep;
-	struct page *page;
-
-	if (! in_hugepage_area(mm->context, address))
-		return ERR_PTR(-EINVAL);
-
-	ptep = huge_pte_offset(mm, address);
-	page = pte_page(*ptep);
-	if (page)
-		page += (address % HPAGE_SIZE) / PAGE_SIZE;
-
-	return page;
-}
-
-int pmd_huge(pmd_t pmd)
-{
-	return 0;
-}
-
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-		pmd_t *pmd, int write)
-{
-	BUG();
-	return NULL;
-}
-
-/* Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions. */
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
-				     unsigned long len, unsigned long pgoff,
-				     unsigned long flags)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long start_addr;
-
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	if (addr) {
-		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
-		if (((TASK_SIZE - len) >= addr)
-		    && (!vma || (addr+len) <= vma->vm_start)
-		    && !is_hugepage_only_range(mm, addr,len))
-			return addr;
-	}
-	if (len > mm->cached_hole_size) {
-	        start_addr = addr = mm->free_area_cache;
-	} else {
-	        start_addr = addr = TASK_UNMAPPED_BASE;
-	        mm->cached_hole_size = 0;
-	}
-
-full_search:
-	vma = find_vma(mm, addr);
-	while (TASK_SIZE - len >= addr) {
-		BUG_ON(vma && (addr >= vma->vm_end));
-
-		if (touches_hugepage_low_range(mm, addr, len)) {
-			addr = ALIGN(addr+1, 1<<SID_SHIFT);
-			vma = find_vma(mm, addr);
-			continue;
-		}
-		if (touches_hugepage_high_range(mm, addr, len)) {
-			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
-			vma = find_vma(mm, addr);
-			continue;
-		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			mm->free_area_cache = addr + len;
-			return addr;
-		}
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-		addr = vma->vm_end;
-		vma = vma->vm_next;
-	}
-
-	/* Make sure we didn't miss any holes */
-	if (start_addr != TASK_UNMAPPED_BASE) {
-		start_addr = addr = TASK_UNMAPPED_BASE;
-		mm->cached_hole_size = 0;
-		goto full_search;
-	}
-	return -ENOMEM;
-}
-
-/*
- * This mmap-allocator allocates new areas top-down from below the
- * stack's low limit (the base):
- *
- * Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions.
- */
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
-			  const unsigned long len, const unsigned long pgoff,
-			  const unsigned long flags)
-{
-	struct vm_area_struct *vma, *prev_vma;
-	struct mm_struct *mm = current->mm;
-	unsigned long base = mm->mmap_base, addr = addr0;
-	unsigned long largest_hole = mm->cached_hole_size;
-	int first_time = 1;
-
-	/* requested length too big for entire address space */
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	/* dont allow allocations above current base */
-	if (mm->free_area_cache > base)
-		mm->free_area_cache = base;
-
-	/* requesting a specific address */
-	if (addr) {
-		addr = PAGE_ALIGN(addr);
-		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start)
-				&& !is_hugepage_only_range(mm, addr,len))
-			return addr;
-	}
-
-	if (len <= largest_hole) {
-	        largest_hole = 0;
-		mm->free_area_cache = base;
-	}
-try_again:
-	/* make sure it can fit in the remaining address space */
-	if (mm->free_area_cache < len)
-		goto fail;
-
-	/* either no address requested or cant fit in requested address hole */
-	addr = (mm->free_area_cache - len) & PAGE_MASK;
-	do {
-hugepage_recheck:
-		if (touches_hugepage_low_range(mm, addr, len)) {
-			addr = (addr & ((~0) << SID_SHIFT)) - len;
-			goto hugepage_recheck;
-		} else if (touches_hugepage_high_range(mm, addr, len)) {
-			addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
-			goto hugepage_recheck;
-		}
-
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * i.e. return with success:
-		 */
- 	 	if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
-			return addr;
-
-		/*
-		 * new region fits between prev_vma->vm_end and
-		 * vma->vm_start, use it:
-		 */
-		if (addr+len <= vma->vm_start &&
-		          (!prev_vma || (addr >= prev_vma->vm_end))) {
-			/* remember the address as a hint for next time */
-		        mm->cached_hole_size = largest_hole;
-		        return (mm->free_area_cache = addr);
-		} else {
-			/* pull free_area_cache down to the first hole */
-		        if (mm->free_area_cache == vma->vm_end) {
-				mm->free_area_cache = vma->vm_start;
-				mm->cached_hole_size = largest_hole;
-			}
-		}
-
-		/* remember the largest hole we saw so far */
-		if (addr + largest_hole < vma->vm_start)
-		        largest_hole = vma->vm_start - addr;
-
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start-len;
-	} while (len <= vma->vm_start);
-
-fail:
-	/*
-	 * if hint left us with no space for the requested
-	 * mapping then try again:
-	 */
-	if (first_time) {
-		mm->free_area_cache = base;
-		largest_hole = 0;
-		first_time = 0;
-		goto try_again;
-	}
-	/*
-	 * A failed mmap() very likely causes application failure,
-	 * so fall back to the bottom-up function here. This scenario
-	 * can happen with large stack limits and large mmap()
-	 * allocations.
-	 */
-	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	mm->cached_hole_size = ~0UL;
-	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-	/*
-	 * Restore the topdown base:
-	 */
-	mm->free_area_cache = base;
-	mm->cached_hole_size = ~0UL;
-
-	return addr;
-}
-
-static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
-{
-	unsigned long addr = 0;
-	struct vm_area_struct *vma;
-
-	vma = find_vma(current->mm, addr);
-	while (addr + len <= 0x100000000UL) {
-		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
-		if (! __within_hugepage_low_range(addr, len, segmask)) {
-			addr = ALIGN(addr+1, 1<<SID_SHIFT);
-			vma = find_vma(current->mm, addr);
-			continue;
-		}
-
-		if (!vma || (addr + len) <= vma->vm_start)
-			return addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-		/* Depending on segmask this might not be a confirmed
-		 * hugepage region, so the ALIGN could have skipped
-		 * some VMAs */
-		vma = find_vma(current->mm, addr);
-	}
-
-	return -ENOMEM;
-}
-
-static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
-{
-	unsigned long addr = 0x100000000UL;
-	struct vm_area_struct *vma;
-
-	vma = find_vma(current->mm, addr);
-	while (addr + len <= TASK_SIZE_USER64) {
-		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
-		if (! __within_hugepage_high_range(addr, len, areamask)) {
-			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
-			vma = find_vma(current->mm, addr);
-			continue;
-		}
-
-		if (!vma || (addr + len) <= vma->vm_start)
-			return addr;
-		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-		/* Depending on segmask this might not be a confirmed
-		 * hugepage region, so the ALIGN could have skipped
-		 * some VMAs */
-		vma = find_vma(current->mm, addr);
-	}
-
-	return -ENOMEM;
-}
-
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-					unsigned long len, unsigned long pgoff,
-					unsigned long flags)
-{
-	int lastshift;
-	u16 areamask, curareas;
-
-	if (len & ~HPAGE_MASK)
-		return -EINVAL;
-
-	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
-		return -EINVAL;
-
-	if (test_thread_flag(TIF_32BIT)) {
-		curareas = current->mm->context.low_htlb_areas;
-
-		/* First see if we can do the mapping in the existing
-		 * low areas */
-		addr = htlb_get_low_area(len, curareas);
-		if (addr != -ENOMEM)
-			return addr;
-
-		lastshift = 0;
-		for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
-		     ! lastshift; areamask >>=1) {
-			if (areamask & 1)
-				lastshift = 1;
-
-			addr = htlb_get_low_area(len, curareas | areamask);
-			if ((addr != -ENOMEM)
-			    && open_low_hpage_areas(current->mm, areamask) == 0)
-				return addr;
-		}
-	} else {
-		curareas = current->mm->context.high_htlb_areas;
-
-		/* First see if we can do the mapping in the existing
-		 * high areas */
-		addr = htlb_get_high_area(len, curareas);
-		if (addr != -ENOMEM)
-			return addr;
-
-		lastshift = 0;
-		for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
-		     ! lastshift; areamask >>=1) {
-			if (areamask & 1)
-				lastshift = 1;
-
-			addr = htlb_get_high_area(len, curareas | areamask);
-			if ((addr != -ENOMEM)
-			    && open_high_hpage_areas(current->mm, areamask) == 0)
-				return addr;
-		}
-	}
-	printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
-	       " enough areas\n");
-	return -ENOMEM;
-}
-
-int hash_huge_page(struct mm_struct *mm, unsigned long access,
-		   unsigned long ea, unsigned long vsid, int local)
-{
-	pte_t *ptep;
-	unsigned long va, vpn;
-	pte_t old_pte, new_pte;
-	unsigned long rflags, prpn;
-	long slot;
-	int err = 1;
-
-	spin_lock(&mm->page_table_lock);
-
-	ptep = huge_pte_offset(mm, ea);
-
-	/* Search the Linux page table for a match with va */
-	va = (vsid << 28) | (ea & 0x0fffffff);
-	vpn = va >> HPAGE_SHIFT;
-
-	/*
-	 * If no pte found or not present, send the problem up to
-	 * do_page_fault
-	 */
-	if (unlikely(!ptep || pte_none(*ptep)))
-		goto out;
-
-/* 	BUG_ON(pte_bad(*ptep)); */
-
-	/* 
-	 * Check the user's access rights to the page.  If access should be
-	 * prevented then send the problem up to do_page_fault.
-	 */
-	if (unlikely(access & ~pte_val(*ptep)))
-		goto out;
-	/*
-	 * At this point, we have a pte (old_pte) which can be used to build
-	 * or update an HPTE. There are 2 cases:
-	 *
-	 * 1. There is a valid (present) pte with no associated HPTE (this is 
-	 *	the most common case)
-	 * 2. There is a valid (present) pte with an associated HPTE. The
-	 *	current values of the pp bits in the HPTE prevent access
-	 *	because we are doing software DIRTY bit management and the
-	 *	page is currently not DIRTY. 
-	 */
-
-
-	old_pte = *ptep;
-	new_pte = old_pte;
-
-	rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
- 	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
-	rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
-
-	/* Check if pte already has an hpte (case 2) */
-	if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
-		/* There MIGHT be an HPTE for this pte */
-		unsigned long hash, slot;
-
-		hash = hpt_hash(vpn, 1);
-		if (pte_val(old_pte) & _PAGE_SECONDARY)
-			hash = ~hash;
-		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
-
-		if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
-			pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
-	}
-
-	if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
-		unsigned long hash = hpt_hash(vpn, 1);
-		unsigned long hpte_group;
-
-		prpn = pte_pfn(old_pte);
-
-repeat:
-		hpte_group = ((hash & htab_hash_mask) *
-			      HPTES_PER_GROUP) & ~0x7UL;
-
-		/* Update the linux pte with the HPTE slot */
-		pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
-		pte_val(new_pte) |= _PAGE_HASHPTE;
-
-		/* Add in WIMG bits */
-		/* XXX We should store these in the pte */
-		rflags |= _PAGE_COHERENT;
-
-		slot = ppc_md.hpte_insert(hpte_group, va, prpn,
-					  HPTE_V_LARGE, rflags);
-
-		/* Primary is full, try the secondary */
-		if (unlikely(slot == -1)) {
-			pte_val(new_pte) |= _PAGE_SECONDARY;
-			hpte_group = ((~hash & htab_hash_mask) *
-				      HPTES_PER_GROUP) & ~0x7UL; 
-			slot = ppc_md.hpte_insert(hpte_group, va, prpn,
-						  HPTE_V_LARGE |
-						  HPTE_V_SECONDARY,
-						  rflags);
-			if (slot == -1) {
-				if (mftb() & 0x1)
-					hpte_group = ((hash & htab_hash_mask) *
-						      HPTES_PER_GROUP)&~0x7UL;
-
-				ppc_md.hpte_remove(hpte_group);
-				goto repeat;
-                        }
-		}
-
-		if (unlikely(slot == -2))
-			panic("hash_huge_page: pte_insert failed\n");
-
-		pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
-		/* 
-		 * No need to use ldarx/stdcx here because all who
-		 * might be updating the pte will hold the
-		 * page_table_lock
-		 */
-		*ptep = new_pte;
-	}
-
-	err = 0;
-
- out:
-	spin_unlock(&mm->page_table_lock);
-
-	return err;
-}
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
deleted file mode 100644
index c65b87b9275..00000000000
--- a/arch/ppc64/mm/imalloc.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * c 2001 PPC 64 Team, IBM Corp
- * 
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/semaphore.h>
-#include <asm/imalloc.h>
-#include <asm/cacheflush.h>
-
-static DECLARE_MUTEX(imlist_sem);
-struct vm_struct * imlist = NULL;
-
-static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
-{
-	unsigned long addr;
-	struct vm_struct **p, *tmp;
-
-	addr = ioremap_bot;
-	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
-		if (size + addr < (unsigned long) tmp->addr)
-			break;
-		if ((unsigned long)tmp->addr >= ioremap_bot)
-			addr = tmp->size + (unsigned long) tmp->addr;
-		if (addr >= IMALLOC_END-size)
-			return 1;
-	}
-	*im_addr = addr;
-
-	return 0;
-}
-
-/* Return whether the region described by v_addr and size is a subset
- * of the region described by parent
- */
-static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
-			struct vm_struct *parent)
-{
-	return (int) (v_addr >= (unsigned long) parent->addr &&
-	              v_addr < (unsigned long) parent->addr + parent->size &&
-	    	      size < parent->size);
-}
-
-/* Return whether the region described by v_addr and size is a superset
- * of the region described by child
- */
-static int im_region_is_superset(unsigned long v_addr, unsigned long size,
-		struct vm_struct *child)
-{
-	struct vm_struct parent;
-
-	parent.addr = (void *) v_addr;
-	parent.size = size;
-
-	return im_region_is_subset((unsigned long) child->addr, child->size,
-			&parent);
-}
-
-/* Return whether the region described by v_addr and size overlaps
- * the region described by vm.  Overlapping regions meet the
- * following conditions:
- * 1) The regions share some part of the address space
- * 2) The regions aren't identical
- * 3) Neither region is a subset of the other
- */
-static int im_region_overlaps(unsigned long v_addr, unsigned long size,
-		     struct vm_struct *vm)
-{
-	if (im_region_is_superset(v_addr, size, vm))
-		return 0;
-
-	return (v_addr + size > (unsigned long) vm->addr + vm->size &&
-		v_addr < (unsigned long) vm->addr + vm->size) ||
-	       (v_addr < (unsigned long) vm->addr &&
-		v_addr + size > (unsigned long) vm->addr);
-}
-
-/* Determine imalloc status of region described by v_addr and size.
- * Can return one of the following:
- * IM_REGION_UNUSED   -  Entire region is unallocated in imalloc space.
- * IM_REGION_SUBSET -    Region is a subset of a region that is already
- * 			 allocated in imalloc space.
- * 		         vm will be assigned to a ptr to the parent region.
- * IM_REGION_EXISTS -    Exact region already allocated in imalloc space.
- *                       vm will be assigned to a ptr to the existing imlist
- *                       member.
- * IM_REGION_OVERLAPS -  Region overlaps an allocated region in imalloc space.
- * IM_REGION_SUPERSET -  Region is a superset of a region that is already
- *                       allocated in imalloc space.
- */
-static int im_region_status(unsigned long v_addr, unsigned long size,
-		    struct vm_struct **vm)
-{
-	struct vm_struct *tmp;
-
-	for (tmp = imlist; tmp; tmp = tmp->next)
-		if (v_addr < (unsigned long) tmp->addr + tmp->size)
-			break;
-
-	if (tmp) {
-		if (im_region_overlaps(v_addr, size, tmp))
-			return IM_REGION_OVERLAP;
-
-		*vm = tmp;
-		if (im_region_is_subset(v_addr, size, tmp)) {
-			/* Return with tmp pointing to superset */
-			return IM_REGION_SUBSET;
-		}
-		if (im_region_is_superset(v_addr, size, tmp)) {
-			/* Return with tmp pointing to first subset */
-			return IM_REGION_SUPERSET;
-		}
-		else if (v_addr == (unsigned long) tmp->addr &&
-		 	 size == tmp->size) {
-			/* Return with tmp pointing to exact region */
-			return IM_REGION_EXISTS;
-		}
-	}
-
-	*vm = NULL;
-	return IM_REGION_UNUSED;
-}
-
-static struct vm_struct * split_im_region(unsigned long v_addr, 
-		unsigned long size, struct vm_struct *parent)
-{
-	struct vm_struct *vm1 = NULL;
-	struct vm_struct *vm2 = NULL;
-	struct vm_struct *new_vm = NULL;
-	
-	vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL);
-	if (vm1	== NULL) {
-		printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
-		return NULL;
-	}
-
-	if (v_addr == (unsigned long) parent->addr) {
-	        /* Use existing parent vm_struct to represent child, allocate
-		 * new one for the remainder of parent range
-		 */
-		vm1->size = parent->size - size;
-		vm1->addr = (void *) (v_addr + size);
-		vm1->next = parent->next;
-
-		parent->size = size;
-		parent->next = vm1;
-		new_vm = parent;
-	} else if (v_addr + size == (unsigned long) parent->addr + 
-			parent->size) {
-		/* Allocate new vm_struct to represent child, use existing
-		 * parent one for remainder of parent range
-		 */
-		vm1->size = size;
-		vm1->addr = (void *) v_addr;
-		vm1->next = parent->next;
-		new_vm = vm1;
-
-		parent->size -= size;
-		parent->next = vm1;
-	} else {
-	        /* Allocate two new vm_structs for the new child and 
-		 * uppermost remainder, and use existing parent one for the
-		 * lower remainder of parent range
-		 */
-		vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL);
-		if (vm2 == NULL) {
-			printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
-			kfree(vm1);
-			return NULL;
-		}
-
-		vm1->size = size;
-		vm1->addr = (void *) v_addr;
-		vm1->next = vm2;
-		new_vm = vm1;
-
-		vm2->size = ((unsigned long) parent->addr + parent->size) - 
-				(v_addr + size);
-		vm2->addr = (void *) v_addr + size;
-		vm2->next = parent->next;
-
-		parent->size = v_addr - (unsigned long) parent->addr;
-		parent->next = vm1;
-	}
-
-	return new_vm;
-}
-
-static struct vm_struct * __add_new_im_area(unsigned long req_addr, 
-					    unsigned long size)
-{
-	struct vm_struct **p, *tmp, *area;
-		
-	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
-		if (req_addr + size <= (unsigned long)tmp->addr)
-			break;
-	}
-	
-	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
-	if (!area)
-		return NULL;
-	area->flags = 0;
-	area->addr = (void *)req_addr;
-	area->size = size;
-	area->next = *p;
-	*p = area;
-
-	return area;
-}
-
-static struct vm_struct * __im_get_area(unsigned long req_addr, 
-					unsigned long size,
-					int criteria)
-{
-	struct vm_struct *tmp;
-	int status;
-
-	status = im_region_status(req_addr, size, &tmp);
-	if ((criteria & status) == 0) {
-		return NULL;
-	}
-	
-	switch (status) {
-	case IM_REGION_UNUSED:
-		tmp = __add_new_im_area(req_addr, size);
-		break;
-	case IM_REGION_SUBSET:
-		tmp = split_im_region(req_addr, size, tmp);
-		break;
-	case IM_REGION_EXISTS:
-		/* Return requested region */
-		break;
-	case IM_REGION_SUPERSET:
-		/* Return first existing subset of requested region */
-		break;
-	default:
-		printk(KERN_ERR "%s() unexpected imalloc region status\n",
-				__FUNCTION__);
-		tmp = NULL;
-	}
-
-	return tmp;
-}
-
-struct vm_struct * im_get_free_area(unsigned long size)
-{
-	struct vm_struct *area;
-	unsigned long addr;
-	
-	down(&imlist_sem);
-	if (get_free_im_addr(size, &addr)) {
-		printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
-				__FUNCTION__, size);
-		area = NULL;
-		goto next_im_done;
-	}
-
-	area = __im_get_area(addr, size, IM_REGION_UNUSED);
-	if (area == NULL) {
-		printk(KERN_ERR 
-		       "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
-			__FUNCTION__, addr, size);
-	}
-next_im_done:
-	up(&imlist_sem);
-	return area;
-}
-
-struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
-		int criteria)
-{
-	struct vm_struct *area;
-
-	down(&imlist_sem);
-	area = __im_get_area(v_addr, size, criteria);
-	up(&imlist_sem);
-	return area;
-}
-
-void im_free(void * addr)
-{
-	struct vm_struct **p, *tmp;
-  
-	if (!addr)
-		return;
-	if ((unsigned long) addr & ~PAGE_MASK) {
-		printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__,			addr);
-		return;
-	}
-	down(&imlist_sem);
-	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
-		if (tmp->addr == addr) {
-			*p = tmp->next;
-
-			/* XXX: do we need the lock? */
-			spin_lock(&init_mm.page_table_lock);
-			unmap_vm_area(tmp);
-			spin_unlock(&init_mm.page_table_lock);
-
-			kfree(tmp);
-			up(&imlist_sem);
-			return;
-		}
-	}
-	up(&imlist_sem);
-	printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
-			addr);
-}
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
deleted file mode 100644
index c2157c9c3ac..00000000000
--- a/arch/ppc64/mm/init.c
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- *  PowerPC version 
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  Dave Engebretsen <engebret@us.ibm.com>
- *      Rework for PPC64 port.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/nodemask.h>
-#include <linux/module.h>
-
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/lmb.h>
-#include <asm/rtas.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/tlb.h>
-#include <asm/eeh.h>
-#include <asm/processor.h>
-#include <asm/mmzone.h>
-#include <asm/cputable.h>
-#include <asm/ppcdebug.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/iommu.h>
-#include <asm/abs_addr.h>
-#include <asm/vdso.h>
-#include <asm/imalloc.h>
-
-#if PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
-
-int mem_init_done;
-unsigned long ioremap_bot = IMALLOC_BASE;
-static unsigned long phbs_io_bot = PHBS_IO_BASE;
-
-extern pgd_t swapper_pg_dir[];
-extern struct task_struct *current_set[NR_CPUS];
-
-unsigned long klimit = (unsigned long)_end;
-
-unsigned long _SDR1=0;
-unsigned long _ASR=0;
-
-/* max amount of RAM to use */
-unsigned long __max_memory;
-
-/* info on what we think the IO hole is */
-unsigned long 	io_hole_start;
-unsigned long	io_hole_size;
-
-void show_mem(void)
-{
-	unsigned long total = 0, reserved = 0;
-	unsigned long shared = 0, cached = 0;
-	struct page *page;
-	pg_data_t *pgdat;
-	unsigned long i;
-
-	printk("Mem-info:\n");
-	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	for_each_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			page = pgdat_page_nr(pgdat, i);
-			total++;
-			if (PageReserved(page))
-				reserved++;
-			else if (PageSwapCache(page))
-				cached++;
-			else if (page_count(page))
-				shared += page_count(page) - 1;
-		}
-	}
-	printk("%ld pages of RAM\n", total);
-	printk("%ld reserved pages\n", reserved);
-	printk("%ld pages shared\n", shared);
-	printk("%ld pages swap cached\n", cached);
-}
-
-#ifdef CONFIG_PPC_ISERIES
-
-void __iomem *ioremap(unsigned long addr, unsigned long size)
-{
-	return (void __iomem *)addr;
-}
-
-extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
-		       unsigned long flags)
-{
-	return (void __iomem *)addr;
-}
-
-void iounmap(volatile void __iomem *addr)
-{
-	return;
-}
-
-#else
-
-/*
- * map_io_page currently only called by __ioremap
- * map_io_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
- */
-static int map_io_page(unsigned long ea, unsigned long pa, int flags)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-	unsigned long vsid;
-
-	if (mem_init_done) {
-		spin_lock(&init_mm.page_table_lock);
-		pgdp = pgd_offset_k(ea);
-		pudp = pud_alloc(&init_mm, pgdp, ea);
-		if (!pudp)
-			return -ENOMEM;
-		pmdp = pmd_alloc(&init_mm, pudp, ea);
-		if (!pmdp)
-			return -ENOMEM;
-		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
-		if (!ptep)
-			return -ENOMEM;
-		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-							  __pgprot(flags)));
-		spin_unlock(&init_mm.page_table_lock);
-	} else {
-		unsigned long va, vpn, hash, hpteg;
-
-		/*
-		 * If the mm subsystem is not fully up, we cannot create a
-		 * linux page table entry for this mapping.  Simply bolt an
-		 * entry in the hardware page table.
-		 */
-		vsid = get_kernel_vsid(ea);
-		va = (vsid << 28) | (ea & 0xFFFFFFF);
-		vpn = va >> PAGE_SHIFT;
-
-		hash = hpt_hash(vpn, 0);
-
-		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
-		/* Panic if a pte grpup is full */
-		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
-				       HPTE_V_BOLTED,
-				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
-		    == -1) {
-			panic("map_io_page: could not insert mapping");
-		}
-	}
-	return 0;
-}
-
-
-static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
-			    unsigned long ea, unsigned long size,
-			    unsigned long flags)
-{
-	unsigned long i;
-
-	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= pgprot_val(PAGE_KERNEL);
-
-	for (i = 0; i < size; i += PAGE_SIZE)
-		if (map_io_page(ea+i, pa+i, flags))
-			return NULL;
-
-	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
-}
-
-
-void __iomem *
-ioremap(unsigned long addr, unsigned long size)
-{
-	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
-}
-
-void __iomem * __ioremap(unsigned long addr, unsigned long size,
-			 unsigned long flags)
-{
-	unsigned long pa, ea;
-	void __iomem *ret;
-
-	/*
-	 * Choose an address to map it to.
-	 * Once the imalloc system is running, we use it.
-	 * Before that, we map using addresses going
-	 * up from ioremap_bot.  imalloc will use
-	 * the addresses from ioremap_bot through
-	 * IMALLOC_END
-	 * 
-	 */
-	pa = addr & PAGE_MASK;
-	size = PAGE_ALIGN(addr + size) - pa;
-
-	if (size == 0)
-		return NULL;
-
-	if (mem_init_done) {
-		struct vm_struct *area;
-		area = im_get_free_area(size);
-		if (area == NULL)
-			return NULL;
-		ea = (unsigned long)(area->addr);
-		ret = __ioremap_com(addr, pa, ea, size, flags);
-		if (!ret)
-			im_free(area->addr);
-	} else {
-		ea = ioremap_bot;
-		ret = __ioremap_com(addr, pa, ea, size, flags);
-		if (ret)
-			ioremap_bot += size;
-	}
-	return ret;
-}
-
-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
-
-int __ioremap_explicit(unsigned long pa, unsigned long ea,
-		       unsigned long size, unsigned long flags)
-{
-	struct vm_struct *area;
-	void __iomem *ret;
-	
-	/* For now, require page-aligned values for pa, ea, and size */
-	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
-	    !IS_PAGE_ALIGNED(size)) {
-		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
-		return 1;
-	}
-	
-	if (!mem_init_done) {
-		/* Two things to consider in this case:
-		 * 1) No records will be kept (imalloc, etc) that the region
-		 *    has been remapped
-		 * 2) It won't be easy to iounmap() the region later (because
-		 *    of 1)
-		 */
-		;
-	} else {
-		area = im_get_area(ea, size,
-			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
-		if (area == NULL) {
-			/* Expected when PHB-dlpar is in play */
-			return 1;
-		}
-		if (ea != (unsigned long) area->addr) {
-			printk(KERN_ERR "unexpected addr return from "
-			       "im_get_area\n");
-			return 1;
-		}
-	}
-	
-	ret = __ioremap_com(pa, pa, ea, size, flags);
-	if (ret == NULL) {
-		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
-		return 1;
-	}
-	if (ret != (void *) ea) {
-		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
-		return 1;
-	}
-
-	return 0;
-}
-
-/*  
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- * This code is modeled after vmalloc code - unmap_vm_area()
- *
- * XXX	what about calls before mem_init_done (ie python_countermeasures())
- */
-void iounmap(volatile void __iomem *token)
-{
-	void *addr;
-
-	if (!mem_init_done)
-		return;
-	
-	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
-
-	im_free(addr);
-}
-
-static int iounmap_subset_regions(unsigned long addr, unsigned long size)
-{
-	struct vm_struct *area;
-
-	/* Check whether subsets of this region exist */
-	area = im_get_area(addr, size, IM_REGION_SUPERSET);
-	if (area == NULL)
-		return 1;
-
-	while (area) {
-		iounmap((void __iomem *) area->addr);
-		area = im_get_area(addr, size,
-				IM_REGION_SUPERSET);
-	}
-
-	return 0;
-}
-
-int iounmap_explicit(volatile void __iomem *start, unsigned long size)
-{
-	struct vm_struct *area;
-	unsigned long addr;
-	int rc;
-	
-	addr = (unsigned long __force) start & PAGE_MASK;
-
-	/* Verify that the region either exists or is a subset of an existing
-	 * region.  In the latter case, split the parent region to create 
-	 * the exact region 
-	 */
-	area = im_get_area(addr, size, 
-			    IM_REGION_EXISTS | IM_REGION_SUBSET);
-	if (area == NULL) {
-		/* Determine whether subset regions exist.  If so, unmap */
-		rc = iounmap_subset_regions(addr, size);
-		if (rc) {
-			printk(KERN_ERR
-			       "%s() cannot unmap nonexistent range 0x%lx\n",
- 				__FUNCTION__, addr);
-			return 1;
-		}
-	} else {
-		iounmap((void __iomem *) area->addr);
-	}
-	/*
-	 * FIXME! This can't be right:
-	iounmap(area->addr);
-	 * Maybe it should be "iounmap(area);"
-	 */
-	return 0;
-}
-
-#endif
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(iounmap);
-
-void free_initmem(void)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)__init_begin;
-	for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
-		memset((void *)addr, 0xcc, PAGE_SIZE);
-		ClearPageReserved(virt_to_page(addr));
-		set_page_count(virt_to_page(addr), 1);
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk ("Freeing unused kernel memory: %luk freed\n",
-		((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	if (start < end)
-		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		set_page_count(virt_to_page(start), 1);
-		free_page(start);
-		totalram_pages++;
-	}
-}
-#endif
-
-static DEFINE_SPINLOCK(mmu_context_lock);
-static DEFINE_IDR(mmu_context_idr);
-
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	int index;
-	int err;
-
-again:
-	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
-		return -ENOMEM;
-
-	spin_lock(&mmu_context_lock);
-	err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
-	spin_unlock(&mmu_context_lock);
-
-	if (err == -EAGAIN)
-		goto again;
-	else if (err)
-		return err;
-
-	if (index > MAX_CONTEXT) {
-		idr_remove(&mmu_context_idr, index);
-		return -ENOMEM;
-	}
-
-	mm->context.id = index;
-
-	return 0;
-}
-
-void destroy_context(struct mm_struct *mm)
-{
-	spin_lock(&mmu_context_lock);
-	idr_remove(&mmu_context_idr, mm->context.id);
-	spin_unlock(&mmu_context_lock);
-
-	mm->context.id = NO_CONTEXT;
-}
-
-/*
- * Do very early mm setup.
- */
-void __init mm_init_ppc64(void)
-{
-#ifndef CONFIG_PPC_ISERIES
-	unsigned long i;
-#endif
-
-	ppc64_boot_msg(0x100, "MM Init");
-
-	/* This is the story of the IO hole... please, keep seated,
-	 * unfortunately, we are out of oxygen masks at the moment.
-	 * So we need some rough way to tell where your big IO hole
-	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
-	 * that area as well, on POWER4 we don't have one, etc...
-	 * We need that as a "hint" when sizing the TCE table on POWER3
-	 * So far, the simplest way that seem work well enough for us it
-	 * to just assume that the first discontinuity in our physical
-	 * RAM layout is the IO hole. That may not be correct in the future
-	 * (and isn't on iSeries but then we don't care ;)
-	 */
-
-#ifndef CONFIG_PPC_ISERIES
-	for (i = 1; i < lmb.memory.cnt; i++) {
-		unsigned long base, prevbase, prevsize;
-
-		prevbase = lmb.memory.region[i-1].base;
-		prevsize = lmb.memory.region[i-1].size;
-		base = lmb.memory.region[i].base;
-		if (base > (prevbase + prevsize)) {
-			io_hole_start = prevbase + prevsize;
-			io_hole_size = base  - (prevbase + prevsize);
-			break;
-		}
-	}
-#endif /* CONFIG_PPC_ISERIES */
-	if (io_hole_start)
-		printk("IO Hole assumed to be %lx -> %lx\n",
-		       io_hole_start, io_hole_start + io_hole_size - 1);
-
-	ppc64_boot_msg(0x100, "MM Init Done");
-}
-
-/*
- * This is called by /dev/mem to know if a given address has to
- * be mapped non-cacheable or not
- */
-int page_is_ram(unsigned long pfn)
-{
-	int i;
-	unsigned long paddr = (pfn << PAGE_SHIFT);
-
-	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base;
-
-		base = lmb.memory.region[i].base;
-
-		if ((paddr >= base) &&
-			(paddr < (base + lmb.memory.region[i].size))) {
-			return 1;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(page_is_ram);
-
-/*
- * Initialize the bootmem system and give it all the memory we
- * have available.
- */
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init do_init_bootmem(void)
-{
-	unsigned long i;
-	unsigned long start, bootmap_pages;
-	unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
-	int boot_mapsize;
-
-	/*
-	 * Find an area to use for the bootmem bitmap.  Calculate the size of
-	 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
-	 * Add 1 additional page in case the address isn't page-aligned.
-	 */
-	bootmap_pages = bootmem_bootmap_pages(total_pages);
-
-	start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
-	BUG_ON(!start);
-
-	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
-
-	max_pfn = max_low_pfn;
-
-	/* Add all physical memory to the bootmem map, mark each area
-	 * present.
-	 */
-	for (i=0; i < lmb.memory.cnt; i++)
-		free_bootmem(lmb.memory.region[i].base,
-			     lmb_size_bytes(&lmb.memory, i));
-
-	/* reserve the sections we're already using */
-	for (i=0; i < lmb.reserved.cnt; i++)
-		reserve_bootmem(lmb.reserved.region[i].base,
-				lmb_size_bytes(&lmb.reserved, i));
-
-	for (i=0; i < lmb.memory.cnt; i++)
-		memory_present(0, lmb_start_pfn(&lmb.memory, i),
-			       lmb_end_pfn(&lmb.memory, i));
-}
-
-/*
- * paging_init() sets up the page tables - in fact we've already done this.
- */
-void __init paging_init(void)
-{
-	unsigned long zones_size[MAX_NR_ZONES];
-	unsigned long zholes_size[MAX_NR_ZONES];
-	unsigned long total_ram = lmb_phys_mem_size();
-	unsigned long top_of_ram = lmb_end_of_DRAM();
-
-	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
-	       top_of_ram, total_ram);
-	printk(KERN_INFO "Memory hole size: %ldMB\n",
-	       (top_of_ram - total_ram) >> 20);
-	/*
-	 * All pages are DMA-able so we put them all in the DMA zone.
-	 */
-	memset(zones_size, 0, sizeof(zones_size));
-	memset(zholes_size, 0, sizeof(zholes_size));
-
-	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
-	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
-
-	free_area_init_node(0, NODE_DATA(0), zones_size,
-			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
-}
-#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
-
-static struct kcore_list kcore_vmem;
-
-static int __init setup_kcore(void)
-{
-	int i;
-
-	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base, size;
-		struct kcore_list *kcore_mem;
-
-		base = lmb.memory.region[i].base;
-		size = lmb.memory.region[i].size;
-
-		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
-		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
-		if (!kcore_mem)
-			panic("mem_init: kmalloc failed\n");
-
-		kclist_add(kcore_mem, __va(base), size);
-	}
-
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
-
-	return 0;
-}
-module_init(setup_kcore);
-
-void __init mem_init(void)
-{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-	int nid;
-#endif
-	pg_data_t *pgdat;
-	unsigned long i;
-	struct page *page;
-	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
-
-	num_physpages = max_low_pfn;	/* RAM is assumed contiguous */
-	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-        for_each_online_node(nid) {
-		if (NODE_DATA(nid)->node_spanned_pages != 0) {
-			printk("freeing bootmem node %x\n", nid);
-			totalram_pages +=
-				free_all_bootmem_node(NODE_DATA(nid));
-		}
-	}
-#else
-	max_mapnr = num_physpages;
-	totalram_pages += free_all_bootmem();
-#endif
-
-	for_each_pgdat(pgdat) {
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			page = pgdat_page_nr(pgdat, i);
-			if (PageReserved(page))
-				reservedpages++;
-		}
-	}
-
-	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
-	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
-	datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
-	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
-	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
-		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		reservedpages << (PAGE_SHIFT-10),
-		datasize >> 10,
-		bsssize >> 10,
-		initsize >> 10);
-
-	mem_init_done = 1;
-
-	/* Initialize the vDSO */
-	vdso_init();
-}
-
-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean.  We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &page->flags))
-		clear_bit(PG_arch_1, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
-{
-	clear_page(page);
-
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-	/*
-	 * We shouldnt have to do this, but some versions of glibc
-	 * require it (ld.so assumes zero filled pages are icache clean)
-	 * - Anton
-	 */
-
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &pg->flags))
-		clear_bit(PG_arch_1, &pg->flags);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-		    struct page *pg)
-{
-	copy_page(vto, vfrom);
-
-	/*
-	 * We should be able to use the following optimisation, however
-	 * there are two problems.
-	 * Firstly a bug in some versions of binutils meant PLT sections
-	 * were not marked executable.
-	 * Secondly the first word in the GOT section is blrl, used
-	 * to establish the GOT address. Until recently the GOT was
-	 * not marked executable.
-	 * - Anton
-	 */
-#if 0
-	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
-		return;
-#endif
-
-	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		return;
-
-	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &pg->flags))
-		clear_bit(PG_arch_1, &pg->flags);
-}
-
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
-			     unsigned long addr, int len)
-{
-	unsigned long maddr;
-
-	maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
-	flush_icache_range(maddr, maddr + len);
-}
-EXPORT_SYMBOL(flush_icache_user_range);
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux PTE.
- * 
- * This must always be called with the mm->page_table_lock held
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
-		      pte_t pte)
-{
-	unsigned long vsid;
-	void *pgdir;
-	pte_t *ptep;
-	int local = 0;
-	cpumask_t tmp;
-	unsigned long flags;
-
-	/* handle i-cache coherency */
-	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
-	    !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			if (!PageReserved(page)
-			    && !test_bit(PG_arch_1, &page->flags)) {
-				__flush_dcache_icache(page_address(page));
-				set_bit(PG_arch_1, &page->flags);
-			}
-		}
-	}
-
-	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
-	if (!pte_young(pte))
-		return;
-
-	pgdir = vma->vm_mm->pgd;
-	if (pgdir == NULL)
-		return;
-
-	ptep = find_linux_pte(pgdir, ea);
-	if (!ptep)
-		return;
-
-	vsid = get_vsid(vma->vm_mm->context.id, ea);
-
-	local_irq_save(flags);
-	tmp = cpumask_of_cpu(smp_processor_id());
-	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
-		local = 1;
-
-	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
-		    0x300, local);
-	local_irq_restore(flags);
-}
-
-void __iomem * reserve_phb_iospace(unsigned long size)
-{
-	void __iomem *virt_addr;
-		
-	if (phbs_io_bot >= IMALLOC_BASE) 
-		panic("reserve_phb_iospace(): phb io space overflow\n");
-			
-	virt_addr = (void __iomem *) phbs_io_bot;
-	phbs_io_bot += size;
-
-	return virt_addr;
-}
-
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
-{
-	memset(addr, 0, kmem_cache_size(cache));
-}
-
-static const int pgtable_cache_size[2] = {
-	PTE_TABLE_SIZE, PMD_TABLE_SIZE
-};
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
-	"pgd_pte_cache", "pud_pmd_cache",
-};
-
-kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
-
-void pgtable_cache_init(void)
-{
-	int i;
-
-	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
-	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
-	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
-	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
-	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
-		int size = pgtable_cache_size[i];
-		const char *name = pgtable_cache_name[i];
-
-		pgtable_cache[i] = kmem_cache_create(name,
-						     size, size,
-						     SLAB_HWCACHE_ALIGN
-						     | SLAB_MUST_HWCACHE_ALIGN,
-						     zero_ctor,
-						     NULL);
-		if (! pgtable_cache[i])
-			panic("pgtable_cache_init(): could not create %s!\n",
-			      name);
-	}
-}
-
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
-			      unsigned long size, pgprot_t vma_prot)
-{
-	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
-
-	if (!page_is_ram(addr >> PAGE_SHIFT))
-		vma_prot = __pgprot(pgprot_val(vma_prot)
-				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
-	return vma_prot;
-}
-EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/ppc64/mm/mmap.c b/arch/ppc64/mm/mmap.c
deleted file mode 100644
index fe65f522aff..00000000000
--- a/arch/ppc64/mm/mmap.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *  linux/arch/ppc64/mm/mmap.c
- *
- *  flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline unsigned long mmap_base(void)
-{
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-
-	if (gap < MIN_GAP)
-		gap = MIN_GAP;
-	else if (gap > MAX_GAP)
-		gap = MAX_GAP;
-
-	return TASK_SIZE - (gap & PAGE_MASK);
-}
-
-static inline int mmap_is_legacy(void)
-{
-	/*
-	 * Force standard allocation for 64 bit programs.
-	 */
-	if (!test_thread_flag(TIF_32BIT))
-		return 1;
-
-	if (current->personality & ADDR_COMPAT_LAYOUT)
-		return 1;
-
-	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
-		return 1;
-
-	return sysctl_legacy_va_layout;
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-	/*
-	 * Fall back to the standard layout if the personality
-	 * bit is set, or if the expected stack growth is unlimited:
-	 */
-	if (mmap_is_legacy()) {
-		mm->mmap_base = TASK_UNMAPPED_BASE;
-		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
-	} else {
-		mm->mmap_base = mmap_base();
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
-	}
-}
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c
deleted file mode 100644
index cb864b8f275..00000000000
--- a/arch/ppc64/mm/numa.c
+++ /dev/null
@@ -1,779 +0,0 @@
-/*
- * pSeries NUMA support
- *
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/threads.h>
-#include <linux/bootmem.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <asm/lmb.h>
-#include <asm/machdep.h>
-#include <asm/abs_addr.h>
-
-static int numa_enabled = 1;
-
-static int numa_debug;
-#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
-
-#ifdef DEBUG_NUMA
-#define ARRAY_INITIALISER -1
-#else
-#define ARRAY_INITIALISER 0
-#endif
-
-int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
-	ARRAY_INITIALISER};
-char *numa_memory_lookup_table;
-cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
-int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
-
-struct pglist_data *node_data[MAX_NUMNODES];
-bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
-static int min_common_depth;
-
-/*
- * We need somewhere to store start/span for each node until we have
- * allocated the real node_data structures.
- */
-static struct {
-	unsigned long node_start_pfn;
-	unsigned long node_end_pfn;
-	unsigned long node_present_pages;
-} init_node_data[MAX_NUMNODES] __initdata;
-
-EXPORT_SYMBOL(node_data);
-EXPORT_SYMBOL(numa_cpu_lookup_table);
-EXPORT_SYMBOL(numa_memory_lookup_table);
-EXPORT_SYMBOL(numa_cpumask_lookup_table);
-EXPORT_SYMBOL(nr_cpus_in_node);
-
-static inline void map_cpu_to_node(int cpu, int node)
-{
-	numa_cpu_lookup_table[cpu] = node;
-	if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) {
-		cpu_set(cpu, numa_cpumask_lookup_table[node]);
-		nr_cpus_in_node[node]++;
-	}
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void unmap_cpu_from_node(unsigned long cpu)
-{
-	int node = numa_cpu_lookup_table[cpu];
-
-	dbg("removing cpu %lu from node %d\n", cpu, node);
-
-	if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
-		cpu_clear(cpu, numa_cpumask_lookup_table[node]);
-		nr_cpus_in_node[node]--;
-	} else {
-		printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
-		       cpu, node);
-	}
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-static struct device_node * __devinit find_cpu_node(unsigned int cpu)
-{
-	unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
-	struct device_node *cpu_node = NULL;
-	unsigned int *interrupt_server, *reg;
-	int len;
-
-	while ((cpu_node = of_find_node_by_type(cpu_node, "cpu")) != NULL) {
-		/* Try interrupt server first */
-		interrupt_server = (unsigned int *)get_property(cpu_node,
-					"ibm,ppc-interrupt-server#s", &len);
-
-		len = len / sizeof(u32);
-
-		if (interrupt_server && (len > 0)) {
-			while (len--) {
-				if (interrupt_server[len] == hw_cpuid)
-					return cpu_node;
-			}
-		} else {
-			reg = (unsigned int *)get_property(cpu_node,
-							   "reg", &len);
-			if (reg && (len > 0) && (reg[0] == hw_cpuid))
-				return cpu_node;
-		}
-	}
-
-	return NULL;
-}
-
-/* must hold reference to node during call */
-static int *of_get_associativity(struct device_node *dev)
-{
-	return (unsigned int *)get_property(dev, "ibm,associativity", NULL);
-}
-
-static int of_node_numa_domain(struct device_node *device)
-{
-	int numa_domain;
-	unsigned int *tmp;
-
-	if (min_common_depth == -1)
-		return 0;
-
-	tmp = of_get_associativity(device);
-	if (tmp && (tmp[0] >= min_common_depth)) {
-		numa_domain = tmp[min_common_depth];
-	} else {
-		dbg("WARNING: no NUMA information for %s\n",
-		    device->full_name);
-		numa_domain = 0;
-	}
-	return numa_domain;
-}
-
-/*
- * In theory, the "ibm,associativity" property may contain multiple
- * associativity lists because a resource may be multiply connected
- * into the machine.  This resource then has different associativity
- * characteristics relative to its multiple connections.  We ignore
- * this for now.  We also assume that all cpu and memory sets have
- * their distances represented at a common level.  This won't be
- * true for heirarchical NUMA.
- *
- * In any case the ibm,associativity-reference-points should give
- * the correct depth for a normal NUMA system.
- *
- * - Dave Hansen <haveblue@us.ibm.com>
- */
-static int __init find_min_common_depth(void)
-{
-	int depth;
-	unsigned int *ref_points;
-	struct device_node *rtas_root;
-	unsigned int len;
-
-	rtas_root = of_find_node_by_path("/rtas");
-
-	if (!rtas_root)
-		return -1;
-
-	/*
-	 * this property is 2 32-bit integers, each representing a level of
-	 * depth in the associativity nodes.  The first is for an SMP
-	 * configuration (should be all 0's) and the second is for a normal
-	 * NUMA configuration.
-	 */
-	ref_points = (unsigned int *)get_property(rtas_root,
-			"ibm,associativity-reference-points", &len);
-
-	if ((len >= 1) && ref_points) {
-		depth = ref_points[1];
-	} else {
-		dbg("WARNING: could not find NUMA "
-		    "associativity reference point\n");
-		depth = -1;
-	}
-	of_node_put(rtas_root);
-
-	return depth;
-}
-
-static int __init get_mem_addr_cells(void)
-{
-	struct device_node *memory = NULL;
-	int rc;
-
-	memory = of_find_node_by_type(memory, "memory");
-	if (!memory)
-		return 0; /* it won't matter */
-
-	rc = prom_n_addr_cells(memory);
-	return rc;
-}
-
-static int __init get_mem_size_cells(void)
-{
-	struct device_node *memory = NULL;
-	int rc;
-
-	memory = of_find_node_by_type(memory, "memory");
-	if (!memory)
-		return 0; /* it won't matter */
-	rc = prom_n_size_cells(memory);
-	return rc;
-}
-
-static unsigned long read_n_cells(int n, unsigned int **buf)
-{
-	unsigned long result = 0;
-
-	while (n--) {
-		result = (result << 32) | **buf;
-		(*buf)++;
-	}
-	return result;
-}
-
-/*
- * Figure out to which domain a cpu belongs and stick it there.
- * Return the id of the domain used.
- */
-static int numa_setup_cpu(unsigned long lcpu)
-{
-	int numa_domain = 0;
-	struct device_node *cpu = find_cpu_node(lcpu);
-
-	if (!cpu) {
-		WARN_ON(1);
-		goto out;
-	}
-
-	numa_domain = of_node_numa_domain(cpu);
-
-	if (numa_domain >= num_online_nodes()) {
-		/*
-		 * POWER4 LPAR uses 0xffff as invalid node,
-		 * dont warn in this case.
-		 */
-		if (numa_domain != 0xffff)
-			printk(KERN_ERR "WARNING: cpu %ld "
-			       "maps to invalid NUMA node %d\n",
-			       lcpu, numa_domain);
-		numa_domain = 0;
-	}
-out:
-	node_set_online(numa_domain);
-
-	map_cpu_to_node(lcpu, numa_domain);
-
-	of_node_put(cpu);
-
-	return numa_domain;
-}
-
-static int cpu_numa_callback(struct notifier_block *nfb,
-			     unsigned long action,
-			     void *hcpu)
-{
-	unsigned long lcpu = (unsigned long)hcpu;
-	int ret = NOTIFY_DONE;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-		if (min_common_depth == -1 || !numa_enabled)
-			map_cpu_to_node(lcpu, 0);
-		else
-			numa_setup_cpu(lcpu);
-		ret = NOTIFY_OK;
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-		unmap_cpu_from_node(lcpu);
-		break;
-		ret = NOTIFY_OK;
-#endif
-	}
-	return ret;
-}
-
-/*
- * Check and possibly modify a memory region to enforce the memory limit.
- *
- * Returns the size the region should have to enforce the memory limit.
- * This will either be the original value of size, a truncated value,
- * or zero. If the returned value of size is 0 the region should be
- * discarded as it lies wholy above the memory limit.
- */
-static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size)
-{
-	/*
-	 * We use lmb_end_of_DRAM() in here instead of memory_limit because
-	 * we've already adjusted it for the limit and it takes care of
-	 * having memory holes below the limit.
-	 */
-	extern unsigned long memory_limit;
-
-	if (! memory_limit)
-		return size;
-
-	if (start + size <= lmb_end_of_DRAM())
-		return size;
-
-	if (start >= lmb_end_of_DRAM())
-		return 0;
-
-	return lmb_end_of_DRAM() - start;
-}
-
-static int __init parse_numa_properties(void)
-{
-	struct device_node *cpu = NULL;
-	struct device_node *memory = NULL;
-	int addr_cells, size_cells;
-	int max_domain = 0;
-	long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
-	unsigned long i;
-
-	if (numa_enabled == 0) {
-		printk(KERN_WARNING "NUMA disabled by user\n");
-		return -1;
-	}
-
-	numa_memory_lookup_table =
-		(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
-	memset(numa_memory_lookup_table, 0, entries * sizeof(char));
-
-	for (i = 0; i < entries ; i++)
-		numa_memory_lookup_table[i] = ARRAY_INITIALISER;
-
-	min_common_depth = find_min_common_depth();
-
-	dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
-	if (min_common_depth < 0)
-		return min_common_depth;
-
-	max_domain = numa_setup_cpu(boot_cpuid);
-
-	/*
-	 * Even though we connect cpus to numa domains later in SMP init,
-	 * we need to know the maximum node id now. This is because each
-	 * node id must have NODE_DATA etc backing it.
-	 * As a result of hotplug we could still have cpus appear later on
-	 * with larger node ids. In that case we force the cpu into node 0.
-	 */
-	for_each_cpu(i) {
-		int numa_domain;
-
-		cpu = find_cpu_node(i);
-
-		if (cpu) {
-			numa_domain = of_node_numa_domain(cpu);
-			of_node_put(cpu);
-
-			if (numa_domain < MAX_NUMNODES &&
-			    max_domain < numa_domain)
-				max_domain = numa_domain;
-		}
-	}
-
-	addr_cells = get_mem_addr_cells();
-	size_cells = get_mem_size_cells();
-	memory = NULL;
-	while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
-		unsigned long start;
-		unsigned long size;
-		int numa_domain;
-		int ranges;
-		unsigned int *memcell_buf;
-		unsigned int len;
-
-		memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
-		if (!memcell_buf || len <= 0)
-			continue;
-
-		ranges = memory->n_addrs;
-new_range:
-		/* these are order-sensitive, and modify the buffer pointer */
-		start = read_n_cells(addr_cells, &memcell_buf);
-		size = read_n_cells(size_cells, &memcell_buf);
-
-		start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
-		size = _ALIGN_UP(size, MEMORY_INCREMENT);
-
-		numa_domain = of_node_numa_domain(memory);
-
-		if (numa_domain >= MAX_NUMNODES) {
-			if (numa_domain != 0xffff)
-				printk(KERN_ERR "WARNING: memory at %lx maps "
-				       "to invalid NUMA node %d\n", start,
-				       numa_domain);
-			numa_domain = 0;
-		}
-
-		if (max_domain < numa_domain)
-			max_domain = numa_domain;
-
-		if (! (size = numa_enforce_memory_limit(start, size))) {
-			if (--ranges)
-				goto new_range;
-			else
-				continue;
-		}
-
-		/*
-		 * Initialize new node struct, or add to an existing one.
-		 */
-		if (init_node_data[numa_domain].node_end_pfn) {
-			if ((start / PAGE_SIZE) <
-			    init_node_data[numa_domain].node_start_pfn)
-				init_node_data[numa_domain].node_start_pfn =
-					start / PAGE_SIZE;
-			if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) >
-			    init_node_data[numa_domain].node_end_pfn)
-				init_node_data[numa_domain].node_end_pfn =
-					(start / PAGE_SIZE) +
-					(size / PAGE_SIZE);
-
-			init_node_data[numa_domain].node_present_pages +=
-				size / PAGE_SIZE;
-		} else {
-			node_set_online(numa_domain);
-
-			init_node_data[numa_domain].node_start_pfn =
-				start / PAGE_SIZE;
-			init_node_data[numa_domain].node_end_pfn =
-				init_node_data[numa_domain].node_start_pfn +
-				size / PAGE_SIZE;
-			init_node_data[numa_domain].node_present_pages =
-				size / PAGE_SIZE;
-		}
-
-		for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
-			numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
-				numa_domain;
-
-		if (--ranges)
-			goto new_range;
-	}
-
-	for (i = 0; i <= max_domain; i++)
-		node_set_online(i);
-
-	return 0;
-}
-
-static void __init setup_nonnuma(void)
-{
-	unsigned long top_of_ram = lmb_end_of_DRAM();
-	unsigned long total_ram = lmb_phys_mem_size();
-	unsigned long i;
-
-	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
-	       top_of_ram, total_ram);
-	printk(KERN_INFO "Memory hole size: %ldMB\n",
-	       (top_of_ram - total_ram) >> 20);
-
-	if (!numa_memory_lookup_table) {
-		long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
-		numa_memory_lookup_table =
-			(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
-		memset(numa_memory_lookup_table, 0, entries * sizeof(char));
-		for (i = 0; i < entries ; i++)
-			numa_memory_lookup_table[i] = ARRAY_INITIALISER;
-	}
-
-	map_cpu_to_node(boot_cpuid, 0);
-
-	node_set_online(0);
-
-	init_node_data[0].node_start_pfn = 0;
-	init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE;
-	init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;
-
-	for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
-		numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
-}
-
-static void __init dump_numa_topology(void)
-{
-	unsigned int node;
-	unsigned int count;
-
-	if (min_common_depth == -1 || !numa_enabled)
-		return;
-
-	for_each_online_node(node) {
-		unsigned long i;
-
-		printk(KERN_INFO "Node %d Memory:", node);
-
-		count = 0;
-
-		for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) {
-			if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) {
-				if (count == 0)
-					printk(" 0x%lx", i);
-				++count;
-			} else {
-				if (count > 0)
-					printk("-0x%lx", i);
-				count = 0;
-			}
-		}
-
-		if (count > 0)
-			printk("-0x%lx", i);
-		printk("\n");
-	}
-	return;
-}
-
-/*
- * Allocate some memory, satisfying the lmb or bootmem allocator where
- * required. nid is the preferred node and end is the physical address of
- * the highest address in the node.
- *
- * Returns the physical address of the memory.
- */
-static unsigned long careful_allocation(int nid, unsigned long size,
-					unsigned long align, unsigned long end)
-{
-	unsigned long ret = lmb_alloc_base(size, align, end);
-
-	/* retry over all memory */
-	if (!ret)
-		ret = lmb_alloc_base(size, align, lmb_end_of_DRAM());
-
-	if (!ret)
-		panic("numa.c: cannot allocate %lu bytes on node %d",
-		      size, nid);
-
-	/*
-	 * If the memory came from a previously allocated node, we must
-	 * retry with the bootmem allocator.
-	 */
-	if (pa_to_nid(ret) < nid) {
-		nid = pa_to_nid(ret);
-		ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid),
-				size, align, 0);
-
-		if (!ret)
-			panic("numa.c: cannot allocate %lu bytes on node %d",
-			      size, nid);
-
-		ret = virt_to_abs(ret);
-
-		dbg("alloc_bootmem %lx %lx\n", ret, size);
-	}
-
-	return ret;
-}
-
-void __init do_init_bootmem(void)
-{
-	int nid;
-	int addr_cells, size_cells;
-	struct device_node *memory = NULL;
-	static struct notifier_block ppc64_numa_nb = {
-		.notifier_call = cpu_numa_callback,
-		.priority = 1 /* Must run before sched domains notifier. */
-	};
-
-	min_low_pfn = 0;
-	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
-	max_pfn = max_low_pfn;
-
-	if (parse_numa_properties())
-		setup_nonnuma();
-	else
-		dump_numa_topology();
-
-	register_cpu_notifier(&ppc64_numa_nb);
-
-	for_each_online_node(nid) {
-		unsigned long start_paddr, end_paddr;
-		int i;
-		unsigned long bootmem_paddr;
-		unsigned long bootmap_pages;
-
-		start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
-		end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE;
-
-		/* Allocate the node structure node local if possible */
-		NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
-					sizeof(struct pglist_data),
-					SMP_CACHE_BYTES, end_paddr);
-		NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid));
-		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-
-  		dbg("node %d\n", nid);
-		dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
-
-		NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
-		NODE_DATA(nid)->node_start_pfn =
-			init_node_data[nid].node_start_pfn;
-		NODE_DATA(nid)->node_spanned_pages =
-			end_paddr - start_paddr;
-
-		if (NODE_DATA(nid)->node_spanned_pages == 0)
-  			continue;
-
-  		dbg("start_paddr = %lx\n", start_paddr);
-  		dbg("end_paddr = %lx\n", end_paddr);
-
-		bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT);
-
-		bootmem_paddr = careful_allocation(nid,
-				bootmap_pages << PAGE_SHIFT,
-				PAGE_SIZE, end_paddr);
-		memset(abs_to_virt(bootmem_paddr), 0,
-		       bootmap_pages << PAGE_SHIFT);
-		dbg("bootmap_paddr = %lx\n", bootmem_paddr);
-
-		init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
-				  start_paddr >> PAGE_SHIFT,
-				  end_paddr >> PAGE_SHIFT);
-
-		/*
-		 * We need to do another scan of all memory sections to
-		 * associate memory with the correct node.
-		 */
-		addr_cells = get_mem_addr_cells();
-		size_cells = get_mem_size_cells();
-		memory = NULL;
-		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
-			unsigned long mem_start, mem_size;
-			int numa_domain, ranges;
-			unsigned int *memcell_buf;
-			unsigned int len;
-
-			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
-			if (!memcell_buf || len <= 0)
-				continue;
-
-			ranges = memory->n_addrs;	/* ranges in cell */
-new_range:
-			mem_start = read_n_cells(addr_cells, &memcell_buf);
-			mem_size = read_n_cells(size_cells, &memcell_buf);
-			if (numa_enabled) {
-				numa_domain = of_node_numa_domain(memory);
-				if (numa_domain  >= MAX_NUMNODES)
-					numa_domain = 0;
-			} else
-				numa_domain =  0;
-
-			if (numa_domain != nid)
-				continue;
-
-			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
-  			if (mem_size) {
-  				dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
-  				free_bootmem_node(NODE_DATA(nid), mem_start, mem_size);
-			}
-
-			if (--ranges)		/* process all ranges in cell */
-				goto new_range;
-		}
-
-		/*
-		 * Mark reserved regions on this node
-		 */
-		for (i = 0; i < lmb.reserved.cnt; i++) {
-			unsigned long physbase = lmb.reserved.region[i].base;
-			unsigned long size = lmb.reserved.region[i].size;
-
-			if (pa_to_nid(physbase) != nid &&
-			    pa_to_nid(physbase+size-1) != nid)
-				continue;
-
-			if (physbase < end_paddr &&
-			    (physbase+size) > start_paddr) {
-				/* overlaps */
-				if (physbase < start_paddr) {
-					size -= start_paddr - physbase;
-					physbase = start_paddr;
-				}
-
-				if (size > end_paddr - physbase)
-					size = end_paddr - physbase;
-
-				dbg("reserve_bootmem %lx %lx\n", physbase,
-				    size);
-				reserve_bootmem_node(NODE_DATA(nid), physbase,
-						     size);
-			}
-		}
-		/*
-		 * This loop may look famaliar, but we have to do it again
-		 * after marking our reserved memory to mark memory present
-		 * for sparsemem.
-		 */
-		addr_cells = get_mem_addr_cells();
-		size_cells = get_mem_size_cells();
-		memory = NULL;
-		while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
-			unsigned long mem_start, mem_size;
-			int numa_domain, ranges;
-			unsigned int *memcell_buf;
-			unsigned int len;
-
-			memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
-			if (!memcell_buf || len <= 0)
-				continue;
-
-			ranges = memory->n_addrs;	/* ranges in cell */
-new_range2:
-			mem_start = read_n_cells(addr_cells, &memcell_buf);
-			mem_size = read_n_cells(size_cells, &memcell_buf);
-			if (numa_enabled) {
-				numa_domain = of_node_numa_domain(memory);
-				if (numa_domain  >= MAX_NUMNODES)
-					numa_domain = 0;
-			} else
-				numa_domain =  0;
-
-			if (numa_domain != nid)
-				continue;
-
-			mem_size = numa_enforce_memory_limit(mem_start, mem_size);
-			memory_present(numa_domain, mem_start >> PAGE_SHIFT,
-				       (mem_start + mem_size) >> PAGE_SHIFT);
-
-			if (--ranges)		/* process all ranges in cell */
-				goto new_range2;
-		}
-
-	}
-}
-
-void __init paging_init(void)
-{
-	unsigned long zones_size[MAX_NR_ZONES];
-	unsigned long zholes_size[MAX_NR_ZONES];
-	int nid;
-
-	memset(zones_size, 0, sizeof(zones_size));
-	memset(zholes_size, 0, sizeof(zholes_size));
-
-	for_each_online_node(nid) {
-		unsigned long start_pfn;
-		unsigned long end_pfn;
-
-		start_pfn = init_node_data[nid].node_start_pfn;
-		end_pfn = init_node_data[nid].node_end_pfn;
-
-		zones_size[ZONE_DMA] = end_pfn - start_pfn;
-		zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
-			init_node_data[nid].node_present_pages;
-
-		dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
-		    zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
-
-		free_area_init_node(nid, NODE_DATA(nid), zones_size,
-							start_pfn, zholes_size);
-	}
-}
-
-static int __init early_numa(char *p)
-{
-	if (!p)
-		return 0;
-
-	if (strstr(p, "off"))
-		numa_enabled = 0;
-
-	if (strstr(p, "debug"))
-		numa_debug = 1;
-
-	return 0;
-}
-early_param("numa", early_numa);
diff --git a/arch/ppc64/mm/slb.c b/arch/ppc64/mm/slb.c
deleted file mode 100644
index 0473953f6a3..00000000000
--- a/arch/ppc64/mm/slb.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * PowerPC64 SLB support.
- *
- * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- * Based on earlier code writteh by:
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- *    Copyright (c) 2001 Dave Engebretsen
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/paca.h>
-#include <asm/cputable.h>
-
-extern void slb_allocate(unsigned long ea);
-
-static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot)
-{
-	return (ea & ESID_MASK) | SLB_ESID_V | slot;
-}
-
-static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags)
-{
-	return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags;
-}
-
-static inline void create_slbe(unsigned long ea, unsigned long flags,
-			       unsigned long entry)
-{
-	asm volatile("slbmte  %0,%1" :
-		     : "r" (mk_vsid_data(ea, flags)),
-		       "r" (mk_esid_data(ea, entry))
-		     : "memory" );
-}
-
-static void slb_flush_and_rebolt(void)
-{
-	/* If you change this make sure you change SLB_NUM_BOLTED
-	 * appropriately too. */
-	unsigned long ksp_flags = SLB_VSID_KERNEL;
-	unsigned long ksp_esid_data;
-
-	WARN_ON(!irqs_disabled());
-
-	if (cpu_has_feature(CPU_FTR_16M_PAGE))
-		ksp_flags |= SLB_VSID_L;
-
-	ksp_esid_data = mk_esid_data(get_paca()->kstack, 2);
-	if ((ksp_esid_data & ESID_MASK) == KERNELBASE)
-		ksp_esid_data &= ~SLB_ESID_V;
-
-	/* We need to do this all in asm, so we're sure we don't touch
-	 * the stack between the slbia and rebolting it. */
-	asm volatile("isync\n"
-		     "slbia\n"
-		     /* Slot 1 - first VMALLOC segment */
-		     "slbmte	%0,%1\n"
-		     /* Slot 2 - kernel stack */
-		     "slbmte	%2,%3\n"
-		     "isync"
-		     :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)),
-		        "r"(mk_esid_data(VMALLOCBASE, 1)),
-		        "r"(mk_vsid_data(ksp_esid_data, ksp_flags)),
-		        "r"(ksp_esid_data)
-		     : "memory");
-}
-
-/* Flush all user entries from the segment table of the current processor. */
-void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
-{
-	unsigned long offset = get_paca()->slb_cache_ptr;
-	unsigned long esid_data = 0;
-	unsigned long pc = KSTK_EIP(tsk);
-	unsigned long stack = KSTK_ESP(tsk);
-	unsigned long unmapped_base;
-
-	if (offset <= SLB_CACHE_ENTRIES) {
-		int i;
-		asm volatile("isync" : : : "memory");
-		for (i = 0; i < offset; i++) {
-			esid_data = ((unsigned long)get_paca()->slb_cache[i]
-				<< SID_SHIFT) | SLBIE_C;
-			asm volatile("slbie %0" : : "r" (esid_data));
-		}
-		asm volatile("isync" : : : "memory");
-	} else {
-		slb_flush_and_rebolt();
-	}
-
-	/* Workaround POWER5 < DD2.1 issue */
-	if (offset == 1 || offset > SLB_CACHE_ENTRIES)
-		asm volatile("slbie %0" : : "r" (esid_data));
-
-	get_paca()->slb_cache_ptr = 0;
-	get_paca()->context = mm->context;
-
-	/*
-	 * preload some userspace segments into the SLB.
-	 */
-	if (test_tsk_thread_flag(tsk, TIF_32BIT))
-		unmapped_base = TASK_UNMAPPED_BASE_USER32;
-	else
-		unmapped_base = TASK_UNMAPPED_BASE_USER64;
-
-	if (pc >= KERNELBASE)
-		return;
-	slb_allocate(pc);
-
-	if (GET_ESID(pc) == GET_ESID(stack))
-		return;
-
-	if (stack >= KERNELBASE)
-		return;
-	slb_allocate(stack);
-
-	if ((GET_ESID(pc) == GET_ESID(unmapped_base))
-	    || (GET_ESID(stack) == GET_ESID(unmapped_base)))
-		return;
-
-	if (unmapped_base >= KERNELBASE)
-		return;
-	slb_allocate(unmapped_base);
-}
-
-void slb_initialize(void)
-{
-	/* On iSeries the bolted entries have already been set up by
-	 * the hypervisor from the lparMap data in head.S */
-#ifndef CONFIG_PPC_ISERIES
-	unsigned long flags = SLB_VSID_KERNEL;
-
- 	/* Invalidate the entire SLB (even slot 0) & all the ERATS */
- 	if (cpu_has_feature(CPU_FTR_16M_PAGE))
- 		flags |= SLB_VSID_L;
-
- 	asm volatile("isync":::"memory");
- 	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
-	asm volatile("isync; slbia; isync":::"memory");
-	create_slbe(KERNELBASE, flags, 0);
-	create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1);
-	/* We don't bolt the stack for the time being - we're in boot,
-	 * so the stack is in the bolted segment.  By the time it goes
-	 * elsewhere, we'll call _switch() which will bolt in the new
-	 * one. */
-	asm volatile("isync":::"memory");
-#endif
-
-	get_paca()->stab_rr = SLB_NUM_BOLTED;
-}
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S
deleted file mode 100644
index a3a03da503b..00000000000
--- a/arch/ppc64/mm/slb_low.S
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * arch/ppc64/mm/slb_low.S
- *
- * Low-level SLB routines
- *
- * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- *
- * Based on earlier C version:
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- *    Copyright (c) 2001 Dave Engebretsen
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-
-/* void slb_allocate(unsigned long ea);
- *
- * Create an SLB entry for the given EA (user or kernel).
- * 	r3 = faulting address, r13 = PACA
- *	r9, r10, r11 are clobbered by this function
- * No other registers are examined or changed.
- */
-_GLOBAL(slb_allocate)
-	/*
-	 * First find a slot, round robin. Previously we tried to find
-	 * a free slot first but that took too long. Unfortunately we
-	 * dont have any LRU information to help us choose a slot.
-	 */
-#ifdef CONFIG_PPC_ISERIES
-	/*
-	 * On iSeries, the "bolted" stack segment can be cast out on
-	 * shared processor switch so we need to check for a miss on
-	 * it and restore it to the right slot.
-	 */
-	ld	r9,PACAKSAVE(r13)
-	clrrdi	r9,r9,28
-	clrrdi	r11,r3,28
-	li	r10,SLB_NUM_BOLTED-1	/* Stack goes in last bolted slot */
-	cmpld	r9,r11
-	beq	3f
-#endif /* CONFIG_PPC_ISERIES */
-
-	ld	r10,PACASTABRR(r13)
-	addi	r10,r10,1
-	/* use a cpu feature mask if we ever change our slb size */
-	cmpldi	r10,SLB_NUM_ENTRIES
-
-	blt+	4f
-	li	r10,SLB_NUM_BOLTED
-
-4:
-	std	r10,PACASTABRR(r13)
-3:
-	/* r3 = faulting address, r10 = entry */
-
-	srdi	r9,r3,60		/* get region */
-	srdi	r3,r3,28		/* get esid */
-	cmpldi	cr7,r9,0xc		/* cmp KERNELBASE for later use */
-
-	rldimi	r10,r3,28,0		/* r10= ESID<<28 | entry */
-	oris	r10,r10,SLB_ESID_V@h	/* r10 |= SLB_ESID_V */
-
-	/* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */
-
-	blt	cr7,0f			/* user or kernel? */
-
-	/* kernel address: proto-VSID = ESID */
-	/* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
-	 * this code will generate the protoVSID 0xfffffffff for the
-	 * top segment.  That's ok, the scramble below will translate
-	 * it to VSID 0, which is reserved as a bad VSID - one which
-	 * will never have any pages in it.  */
-	li	r11,SLB_VSID_KERNEL
-BEGIN_FTR_SECTION
-	bne	cr7,9f
-	li	r11,(SLB_VSID_KERNEL|SLB_VSID_L)
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-	b	9f
-
-0:	/* user address: proto-VSID = context<<15 | ESID */
-	srdi.	r9,r3,USER_ESID_BITS
-	bne-	8f			/* invalid ea bits set */
-
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
-	lhz	r9,PACAHIGHHTLBAREAS(r13)
-	srdi	r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
-	srd	r9,r9,r11
-	lhz	r11,PACALOWHTLBAREAS(r13)
-	srd	r11,r11,r3
-	or	r9,r9,r11
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-#endif /* CONFIG_HUGETLB_PAGE */
-
-	li	r11,SLB_VSID_USER
-
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
-	rldimi	r11,r9,8,55		/* shift masked bit into SLB_VSID_L */
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-#endif /* CONFIG_HUGETLB_PAGE */
-
-	ld	r9,PACACONTEXTID(r13)
-	rldimi	r3,r9,USER_ESID_BITS,0
-
-9:	/* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */
-	ASM_VSID_SCRAMBLE(r3,r9)
-
-	rldimi	r11,r3,SLB_VSID_SHIFT,16	/* combine VSID and flags */
-
-	/*
-	 * No need for an isync before or after this slbmte. The exception
-	 * we enter with and the rfid we exit with are context synchronizing.
-	 */
-	slbmte	r11,r10
-
-	bgelr	cr7			/* we're done for kernel addresses */
-
-	/* Update the slb cache */
-	lhz	r3,PACASLBCACHEPTR(r13)	/* offset = paca->slb_cache_ptr */
-	cmpldi	r3,SLB_CACHE_ENTRIES
-	bge	1f
-
-	/* still room in the slb cache */
-	sldi	r11,r3,1		/* r11 = offset * sizeof(u16) */
-	rldicl	r10,r10,36,28		/* get low 16 bits of the ESID */
-	add	r11,r11,r13		/* r11 = (u16 *)paca + offset */
-	sth	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
-	addi	r3,r3,1			/* offset++ */
-	b	2f
-1:					/* offset >= SLB_CACHE_ENTRIES */
-	li	r3,SLB_CACHE_ENTRIES+1
-2:
-	sth	r3,PACASLBCACHEPTR(r13)	/* paca->slb_cache_ptr = offset */
-	blr
-
-8:	/* invalid EA */
-	li	r3,0			/* BAD_VSID */
-	li	r11,SLB_VSID_USER	/* flags don't much matter */
-	b	9b
diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c
deleted file mode 100644
index 1b83f002bf2..00000000000
--- a/arch/ppc64/mm/stab.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * PowerPC64 Segment Translation Support.
- *
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- *    Copyright (c) 2001 Dave Engebretsen
- *
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/paca.h>
-#include <asm/cputable.h>
-#include <asm/lmb.h>
-#include <asm/abs_addr.h>
-
-struct stab_entry {
-	unsigned long esid_data;
-	unsigned long vsid_data;
-};
-
-/* Both the segment table and SLB code uses the following cache */
-#define NR_STAB_CACHE_ENTRIES 8
-DEFINE_PER_CPU(long, stab_cache_ptr);
-DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
-
-/*
- * Create a segment table entry for the given esid/vsid pair.
- */
-static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid)
-{
-	unsigned long esid_data, vsid_data;
-	unsigned long entry, group, old_esid, castout_entry, i;
-	unsigned int global_entry;
-	struct stab_entry *ste, *castout_ste;
-	unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE;
-
-	vsid_data = vsid << STE_VSID_SHIFT;
-	esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V;
-	if (! kernel_segment)
-		esid_data |= STE_ESID_KS;
-
-	/* Search the primary group first. */
-	global_entry = (esid & 0x1f) << 3;
-	ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
-
-	/* Find an empty entry, if one exists. */
-	for (group = 0; group < 2; group++) {
-		for (entry = 0; entry < 8; entry++, ste++) {
-			if (!(ste->esid_data & STE_ESID_V)) {
-				ste->vsid_data = vsid_data;
-				asm volatile("eieio":::"memory");
-				ste->esid_data = esid_data;
-				return (global_entry | entry);
-			}
-		}
-		/* Now search the secondary group. */
-		global_entry = ((~esid) & 0x1f) << 3;
-		ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
-	}
-
-	/*
-	 * Could not find empty entry, pick one with a round robin selection.
-	 * Search all entries in the two groups.
-	 */
-	castout_entry = get_paca()->stab_rr;
-	for (i = 0; i < 16; i++) {
-		if (castout_entry < 8) {
-			global_entry = (esid & 0x1f) << 3;
-			ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7));
-			castout_ste = ste + castout_entry;
-		} else {
-			global_entry = ((~esid) & 0x1f) << 3;
-			ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7));
-			castout_ste = ste + (castout_entry - 8);
-		}
-
-		/* Dont cast out the first kernel segment */
-		if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE)
-			break;
-
-		castout_entry = (castout_entry + 1) & 0xf;
-	}
-
-	get_paca()->stab_rr = (castout_entry + 1) & 0xf;
-
-	/* Modify the old entry to the new value. */
-
-	/* Force previous translations to complete. DRENG */
-	asm volatile("isync" : : : "memory");
-
-	old_esid = castout_ste->esid_data >> SID_SHIFT;
-	castout_ste->esid_data = 0;		/* Invalidate old entry */
-
-	asm volatile("sync" : : : "memory");    /* Order update */
-
-	castout_ste->vsid_data = vsid_data;
-	asm volatile("eieio" : : : "memory");   /* Order update */
-	castout_ste->esid_data = esid_data;
-
-	asm volatile("slbie  %0" : : "r" (old_esid << SID_SHIFT));
-	/* Ensure completion of slbie */
-	asm volatile("sync" : : : "memory");
-
-	return (global_entry | (castout_entry & 0x7));
-}
-
-/*
- * Allocate a segment table entry for the given ea and mm
- */
-static int __ste_allocate(unsigned long ea, struct mm_struct *mm)
-{
-	unsigned long vsid;
-	unsigned char stab_entry;
-	unsigned long offset;
-
-	/* Kernel or user address? */
-	if (ea >= KERNELBASE) {
-		vsid = get_kernel_vsid(ea);
-	} else {
-		if ((ea >= TASK_SIZE_USER64) || (! mm))
-			return 1;
-
-		vsid = get_vsid(mm->context.id, ea);
-	}
-
-	stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid);
-
-	if (ea < KERNELBASE) {
-		offset = __get_cpu_var(stab_cache_ptr);
-		if (offset < NR_STAB_CACHE_ENTRIES)
-			__get_cpu_var(stab_cache[offset++]) = stab_entry;
-		else
-			offset = NR_STAB_CACHE_ENTRIES+1;
-		__get_cpu_var(stab_cache_ptr) = offset;
-
-		/* Order update */
-		asm volatile("sync":::"memory");
-	}
-
-	return 0;
-}
-
-int ste_allocate(unsigned long ea)
-{
-	return __ste_allocate(ea, current->mm);
-}
-
-/*
- * Do the segment table work for a context switch: flush all user
- * entries from the table, then preload some probably useful entries
- * for the new task
- */
-void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
-{
-	struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr;
-	struct stab_entry *ste;
-	unsigned long offset = __get_cpu_var(stab_cache_ptr);
-	unsigned long pc = KSTK_EIP(tsk);
-	unsigned long stack = KSTK_ESP(tsk);
-	unsigned long unmapped_base;
-
-	/* Force previous translations to complete. DRENG */
-	asm volatile("isync" : : : "memory");
-
-	if (offset <= NR_STAB_CACHE_ENTRIES) {
-		int i;
-
-		for (i = 0; i < offset; i++) {
-			ste = stab + __get_cpu_var(stab_cache[i]);
-			ste->esid_data = 0; /* invalidate entry */
-		}
-	} else {
-		unsigned long entry;
-
-		/* Invalidate all entries. */
-		ste = stab;
-
-		/* Never flush the first entry. */
-		ste += 1;
-		for (entry = 1;
-		     entry < (PAGE_SIZE / sizeof(struct stab_entry));
-		     entry++, ste++) {
-			unsigned long ea;
-			ea = ste->esid_data & ESID_MASK;
-			if (ea < KERNELBASE) {
-				ste->esid_data = 0;
-			}
-		}
-	}
-
-	asm volatile("sync; slbia; sync":::"memory");
-
-	__get_cpu_var(stab_cache_ptr) = 0;
-
-	/* Now preload some entries for the new task */
-	if (test_tsk_thread_flag(tsk, TIF_32BIT))
-		unmapped_base = TASK_UNMAPPED_BASE_USER32;
-	else
-		unmapped_base = TASK_UNMAPPED_BASE_USER64;
-
-	__ste_allocate(pc, mm);
-
-	if (GET_ESID(pc) == GET_ESID(stack))
-		return;
-
-	__ste_allocate(stack, mm);
-
-	if ((GET_ESID(pc) == GET_ESID(unmapped_base))
-	    || (GET_ESID(stack) == GET_ESID(unmapped_base)))
-		return;
-
-	__ste_allocate(unmapped_base, mm);
-
-	/* Order update */
-	asm volatile("sync" : : : "memory");
-}
-
-extern void slb_initialize(void);
-
-/*
- * Allocate segment tables for secondary CPUs.  These must all go in
- * the first (bolted) segment, so that do_stab_bolted won't get a
- * recursive segment miss on the segment table itself.
- */
-void stabs_alloc(void)
-{
-	int cpu;
-
-	if (cpu_has_feature(CPU_FTR_SLB))
-		return;
-
-	for_each_cpu(cpu) {
-		unsigned long newstab;
-
-		if (cpu == 0)
-			continue; /* stab for CPU 0 is statically allocated */
-
-		newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
-		if (! newstab)
-			panic("Unable to allocate segment table for CPU %d.\n",
-			      cpu);
-
-		newstab += KERNELBASE;
-
-		memset((void *)newstab, 0, PAGE_SIZE);
-
-		paca[cpu].stab_addr = newstab;
-		paca[cpu].stab_real = virt_to_abs(newstab);
-		printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
-	}
-}
-
-/*
- * Build an entry for the base kernel segment and put it into
- * the segment table or SLB.  All other segment table or SLB
- * entries are faulted in.
- */
-void stab_initialize(unsigned long stab)
-{
-	unsigned long vsid = get_kernel_vsid(KERNELBASE);
-
-	if (cpu_has_feature(CPU_FTR_SLB)) {
-		slb_initialize();
-	} else {
-		asm volatile("isync; slbia; isync":::"memory");
-		make_ste(stab, GET_ESID(KERNELBASE), vsid);
-
-		/* Order update */
-		asm volatile("sync":::"memory");
-	}
-}
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c
deleted file mode 100644
index 09ab81a10f4..00000000000
--- a/arch/ppc64/mm/tlb.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * This file contains the routines for flushing entries from the
- * TLB and MMU hash table.
- *
- *  Derived from arch/ppc64/mm/init.c:
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
- *    Copyright (C) 1996 Paul Mackerras
- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
- *
- *  Derived from "arch/i386/mm/init.c"
- *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
- *
- *  Dave Engebretsen <engebret@us.ibm.com>
- *      Rework for PPC64 port.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/tlb.h>
-#include <linux/highmem.h>
-
-DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
-
-/* This is declared as we are using the more or less generic
- * include/asm-ppc64/tlb.h file -- tgall
- */
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
-unsigned long pte_freelist_forced_free;
-
-struct pte_freelist_batch
-{
-	struct rcu_head	rcu;
-	unsigned int	index;
-	pgtable_free_t	tables[0];
-};
-
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
-unsigned long pte_freelist_forced_free;
-
-#define PTE_FREELIST_SIZE \
-	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
-	  / sizeof(pgtable_free_t))
-
-#ifdef CONFIG_SMP
-static void pte_free_smp_sync(void *arg)
-{
-	/* Do nothing, just ensure we sync with all CPUs */
-}
-#endif
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-static void pgtable_free_now(pgtable_free_t pgf)
-{
-	pte_freelist_forced_free++;
-
-	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
-
-	pgtable_free(pgf);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
-	struct pte_freelist_batch *batch =
-		container_of(head, struct pte_freelist_batch, rcu);
-	unsigned int i;
-
-	for (i = 0; i < batch->index; i++)
-		pgtable_free(batch->tables[i]);
-
-	free_page((unsigned long)batch);
-}
-
-static void pte_free_submit(struct pte_freelist_batch *batch)
-{
-	INIT_RCU_HEAD(&batch->rcu);
-	call_rcu(&batch->rcu, pte_free_rcu_callback);
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
-{
-	/* This is safe as we are holding page_table_lock */
-        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
-
-	if (atomic_read(&tlb->mm->mm_users) < 2 ||
-	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
-		pgtable_free(pgf);
-		return;
-	}
-
-	if (*batchp == NULL) {
-		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
-		if (*batchp == NULL) {
-			pgtable_free_now(pgf);
-			return;
-		}
-		(*batchp)->index = 0;
-	}
-	(*batchp)->tables[(*batchp)->index++] = pgf;
-	if ((*batchp)->index == PTE_FREELIST_SIZE) {
-		pte_free_submit(*batchp);
-		*batchp = NULL;
-	}
-}
-
-/*
- * Update the MMU hash table to correspond with a change to
- * a Linux PTE.  If wrprot is true, it is permissible to
- * change the existing HPTE to read-only rather than removing it
- * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
- */
-void hpte_update(struct mm_struct *mm, unsigned long addr,
-		 unsigned long pte, int wrprot)
-{
-	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	unsigned long vsid;
-	int i;
-
-	i = batch->index;
-
-	/*
-	 * This can happen when we are in the middle of a TLB batch and
-	 * we encounter memory pressure (eg copy_page_range when it tries
-	 * to allocate a new pte). If we have to reclaim memory and end
-	 * up scanning and resetting referenced bits then our batch context
-	 * will change mid stream.
-	 */
-	if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) {
-		flush_tlb_pending();
-		i = 0;
-	}
-	if (i == 0) {
-		batch->mm = mm;
-		batch->large = pte_huge(pte);
-	}
-	if (addr < KERNELBASE) {
-		vsid = get_vsid(mm->context.id, addr);
-		WARN_ON(vsid == 0);
-	} else
-		vsid = get_kernel_vsid(addr);
-	batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff);
-	batch->pte[i] = __pte(pte);
-	batch->index = ++i;
-	if (i >= PPC64_TLB_BATCH_NR)
-		flush_tlb_pending();
-}
-
-void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
-{
-	int i;
-	int cpu;
-	cpumask_t tmp;
-	int local = 0;
-
-	BUG_ON(in_interrupt());
-
-	cpu = get_cpu();
-	i = batch->index;
-	tmp = cpumask_of_cpu(cpu);
-	if (cpus_equal(batch->mm->cpu_vm_mask, tmp))
-		local = 1;
-
-	if (i == 1)
-		flush_hash_page(batch->vaddr[0], batch->pte[0], local);
-	else
-		flush_hash_range(i, local);
-	batch->index = 0;
-	put_cpu();
-}
-
-void pte_free_finish(void)
-{
-	/* This is safe as we are holding page_table_lock */
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
-
-	if (*batchp == NULL)
-		return;
-	pte_free_submit(*batchp);
-	*batchp = NULL;
-}
-- 
cgit v1.2.3-70-g09d2


From b3b8dc6c07cecc1f8d52d03f677206bdf9f794c9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 10 Oct 2005 22:20:10 +1000
Subject: powerpc: Use reg.h instead of processor.h when we just want reg names

Now that the register names and bit definitions are all in reg.h,
use that instead of processor.h in assembly code in a few places.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/fpu.S      | 2 +-
 arch/powerpc/kernel/head_32.S  | 6 +++---
 arch/powerpc/kernel/idle_6xx.S | 2 +-
 arch/powerpc/kernel/vector.S   | 2 +-
 arch/powerpc/lib/string.S      | 5 -----
 arch/powerpc/mm/hash_low_32.S  | 2 +-
 6 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 04e95e810b2..563d445ff58 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -10,7 +10,7 @@
  */
 
 #include <linux/config.h>
-#include <asm/processor.h>
+#include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index d9dbbd42674..108e78ef387 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -23,7 +23,7 @@
  */
 
 #include <linux/config.h>
-#include <asm/processor.h>
+#include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
@@ -55,8 +55,8 @@
 1:
 
 	.text
-	.stabs	"arch/ppc/kernel/",N_SO,0,0,0f
-	.stabs	"head.S",N_SO,0,0,0f
+	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
+	.stabs	"head_32.S",N_SO,0,0,0f
 0:
 	.globl	_stext
 _stext:
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 1a2194cf682..444fdcc769f 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -15,7 +15,7 @@
 
 #include <linux/config.h>
 #include <linux/threads.h>
-#include <asm/processor.h>
+#include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/cputable.h>
 #include <asm/thread_info.h>
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 12cb90bc209..66b3d03c5fa 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,6 +1,6 @@
 #include <linux/config.h>
 #include <asm/ppc_asm.h>
-#include <asm/processor.h>
+#include <asm/reg.h>
 
 /*
  * The routines below are in assembler so we can closely control the
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 15d40e9ef8b..b9ca84ed892 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -13,11 +13,6 @@
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 
-	.text
-	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"string.S",N_SO,0,0,0f
-0:
-
 	.section __ex_table,"a"
 #ifdef CONFIG_PPC64
 	.align	3
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 57278a8dd13..12ccd7155ba 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -24,7 +24,7 @@
  */
 
 #include <linux/config.h>
-#include <asm/processor.h>
+#include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/cputable.h>
-- 
cgit v1.2.3-70-g09d2


From 3a5f8c5f788d68e325d9fe3c26f4df5a5aee838a Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 11 Oct 2005 19:40:20 +1000
Subject: powerpc: make iSeries boot again

On ARCH=ppc64 we were getting htab_hash_mask recalculated
to the correct value for our particular machine by accident.
In the merge tree, that code was commented out, so htab_hash_mask
was being corrupted.

We now set ppc64_pft_size instead which gets htab_has_mask
calculated correctly for us later.  We should put an
ibm,pft-size property in the device tree at some point.

Also set -mno-minimal-toc in some makefiles.
Allow iSeries to configure PROC_DEVICETREE.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/Kconfig                    | 2 +-
 arch/powerpc/mm/Makefile                | 4 ++++
 arch/powerpc/platforms/iseries/Makefile | 2 ++
 arch/powerpc/platforms/iseries/setup.c  | 6 +-----
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 330376b74c8..cd55c9b0f7e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -570,7 +570,7 @@ config SCHED_SMT
 
 config PROC_DEVICETREE
 	bool "Support for Open Firmware device tree in /proc"
-	depends on PPC_OF && PROC_FS
+	depends on (PPC_OF || PPC_ISERIES) && PROC_FS
 	help
 	  This option adds a device-tree directory under /proc which contains
 	  an image of the device tree that the kernel copies from Open
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 612bc4ec72b..14b2ffb6c2d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -2,6 +2,10 @@
 # Makefile for the linux ppc-specific parts of the memory manager.
 #
 
+ifeq ($(CONFIG_PPC64),y)
+EXTRA_CFLAGS	+= -mno-minimal-toc
+endif
+
 obj-y				:= fault.o mem.o lmb.o
 obj-$(CONFIG_PPC32)		+= init_32.o pgtable_32.o mmu_context_32.o \
 				   tlb_32.o
diff --git a/arch/powerpc/platforms/iseries/Makefile b/arch/powerpc/platforms/iseries/Makefile
index 18bf4007756..127b465308b 100644
--- a/arch/powerpc/platforms/iseries/Makefile
+++ b/arch/powerpc/platforms/iseries/Makefile
@@ -1,3 +1,5 @@
+EXTRA_CFLAGS	+= -mno-minimal-toc
+
 obj-y += hvlog.o hvlpconfig.o lpardata.o setup.o mf.o lpevents.o \
 	hvcall.o proc.o htab.o iommu.o misc.o
 obj-$(CONFIG_PCI) += pci.o irq.o vpdinfo.o
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index c3e532b766e..b6cf050a8c2 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -430,7 +430,6 @@ static void __init build_iSeries_Memory_Map(void)
 	u32 loadAreaFirstChunk, loadAreaLastChunk, loadAreaSize;
 	u32 nextPhysChunk;
 	u32 hptFirstChunk, hptLastChunk, hptSizeChunks, hptSizePages;
-	u32 num_ptegs;
 	u32 totalChunks,moreChunks;
 	u32 currChunk, thisChunk, absChunk;
 	u32 currDword;
@@ -493,10 +492,7 @@ static void __init build_iSeries_Memory_Map(void)
 	printk("HPT absolute addr = %016lx, size = %dK\n",
 			chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
 
-	/* Fill in the hashed page table hash mask */
-	num_ptegs = hptSizePages *
-		(PAGE_SIZE / (sizeof(hpte_t) * HPTES_PER_GROUP));
-	htab_hash_mask = num_ptegs - 1;
+	ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE);
 
 	/*
 	 * The actual hashed page table is in the hypervisor,
-- 
cgit v1.2.3-70-g09d2


From 3eac8c69d1ac1266327f4e29deb23716a12c6d30 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 12 Oct 2005 16:58:53 +1000
Subject: powerpc: Move default hash table size calculation to hash_utils_64.c

We weren't computing the size of the hash table correctly on iSeries
because the relevant code in prom.c was #ifdef CONFIG_PPC_PSERIES.
This moves the code to hash_utils_64.c, makes it unconditional, and
cleans it up a bit.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom.c      | 20 --------------------
 arch/powerpc/mm/hash_utils_64.c | 23 ++++++++++++++++++++++-
 2 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ce0dff1caa8..c8d288457b4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1323,26 +1323,6 @@ void __init early_init_devtree(void *params)
 	 */
 	scan_flat_dt(early_init_dt_scan_cpus, NULL);
 
-#ifdef CONFIG_PPC_PSERIES
-	/* If hash size wasn't obtained above, we calculate it now based on
-	 * the total RAM size
-	 */
-	if (ppc64_pft_size == 0) {
-		unsigned long rnd_mem_size, pteg_count;
-
-		/* round mem_size up to next power of 2 */
-		rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
-		if (rnd_mem_size < systemcfg->physicalMemorySize)
-			rnd_mem_size <<= 1;
-
-		/* # pages / 2 */
-		pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
-
-		ppc64_pft_size = __ilog2(pteg_count << 7);
-	}
-
-	DBG("Hash pftSize: %x\n", (int)ppc64_pft_size);
-#endif
 	DBG(" <- early_init_devtree()\n");
 }
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 35dd93eeaf4..6e9e05cce02 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -155,6 +155,27 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end,
 	}
 }
 
+static unsigned long get_hashtable_size(void)
+{
+	unsigned long rnd_mem_size, pteg_count;
+
+	/* If hash size wasn't obtained in prom.c, we calculate it now based on
+	 * the total RAM size
+	 */
+	if (ppc64_pft_size)
+		return 1UL << ppc64_pft_size;
+
+	/* round mem_size up to next power of 2 */
+	rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize);
+	if (rnd_mem_size < systemcfg->physicalMemorySize)
+		rnd_mem_size <<= 1;
+
+	/* # pages / 2 */
+	pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11);
+
+	return pteg_count << 7;
+}
+
 void __init htab_initialize(void)
 {
 	unsigned long table, htab_size_bytes;
@@ -170,7 +191,7 @@ void __init htab_initialize(void)
 	 * Calculate the required size of the htab.  We want the number of
 	 * PTEGs to equal one half the number of real pages.
 	 */ 
-	htab_size_bytes = 1UL << ppc64_pft_size;
+	htab_size_bytes = get_hashtable_size();
 	pteg_count = htab_size_bytes >> 7;
 
 	/* For debug, make the HTAB 1/8 as big as it normally would be. */
-- 
cgit v1.2.3-70-g09d2


From 30cd4a4e9c25e154ba087848a839bd0c6d024092 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 17 Oct 2005 19:20:46 +1000
Subject: powerpc: Initialize btext subsystem later, after prom_init

We were initializing the btext stuff from prom_init(), thus breaking
the rule that all communication between prom_init() and the rest of
the kernel has to be via the flattened device tree.  This removes
the btext initialization calls from prom_init() and initializes it
instead after the device tree is unflattened.  It would be nice to
do it earlier, but that needs some more infrastructure to find the
properties we need in the flattened device tree.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/head_32.S   | 28 -------------------
 arch/powerpc/kernel/prom_init.c | 59 -----------------------------------------
 arch/powerpc/kernel/setup_32.c  | 16 +++--------
 arch/powerpc/mm/init_32.c       |  7 -----
 4 files changed, 4 insertions(+), 106 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index d9b063f567e..7ef9a3e3002 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -153,9 +153,6 @@ __after_mmu_off:
 	bl	flush_tlbs
 
 	bl	initial_bats
-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
-	bl	setup_disp_bat
-#endif
 
 /*
  * Call setup_cpu for CPU 0 and initialize 6xx Idle
@@ -1297,31 +1294,6 @@ initial_bats:
 	isync
 	blr
 
-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
-setup_disp_bat:
-	/*
-	 * setup the display bat prepared for us in prom.c
-	 */
-	mflr	r8
-	bl	reloc_offset
-	mtlr	r8
-	addis	r8,r3,disp_BAT@ha
-	addi	r8,r8,disp_BAT@l
-	lwz	r11,0(r8)
-	lwz	r8,4(r8)
-	mfspr	r9,SPRN_PVR
-	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
-	cmpwi	0,r9,1
-	beq	1f
-	mtspr	SPRN_DBAT3L,r8
-	mtspr	SPRN_DBAT3U,r11
-	blr
-1:	mtspr	SPRN_IBAT3L,r8
-	mtspr	SPRN_IBAT3U,r11
-	blr
-
-#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
-
 
 #ifdef CONFIG_8260
 /* Jump into the system reset for the rom.
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 911a803f27d..d9130c83903 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1404,62 +1404,6 @@ static int __init prom_find_machine_type(void)
 #endif
 }
 
-static int __init setup_disp(phandle dp)
-{
-#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32)
-	int width = 640, height = 480, depth = 8, pitch;
-	unsigned address;
-	u32 addrs[8][5];
-	int i, naddrs;
-	char name[32];
-	char *getprop = "getprop";
-
-	prom_printf("Initializing screen: ");
-
-	memset(name, 0, sizeof(name));
-	call_prom(getprop, 4, 1, dp, "name", name, sizeof(name));
-	name[sizeof(name)-1] = 0;
-	prom_print(name);
-	prom_print("\n");
-	call_prom(getprop, 4, 1, dp, "width", &width, sizeof(width));
-	call_prom(getprop, 4, 1, dp, "height", &height, sizeof(height));
-	call_prom(getprop, 4, 1, dp, "depth", &depth, sizeof(depth));
-	pitch = width * ((depth + 7) / 8);
-	call_prom(getprop, 4, 1, dp, "linebytes",
-		  &pitch, sizeof(pitch));
-	if (pitch == 1)
-		pitch = 0x1000;		/* for strange IBM display */
-	address = 0;
-	call_prom(getprop, 4, 1, dp, "address", &address, sizeof(address));
-	if (address == 0) {
-		/* look for an assigned address with a size of >= 1MB */
-		naddrs = call_prom(getprop, 4, 1, dp, "assigned-addresses",
-				   addrs, sizeof(addrs));
-		naddrs /= 20;
-		for (i = 0; i < naddrs; ++i) {
-			if (addrs[i][4] >= (1 << 20)) {
-				address = addrs[i][2];
-				/* use the BE aperture if possible */
-				if (addrs[i][4] >= (16 << 20))
-					address += (8 << 20);
-				break;
-			}
-		}
-		if (address == 0) {
-			prom_print("Failed to get address\n");
-			return 0;
-		}
-	}
-	/* kludge for valkyrie */
-	if (strcmp(name, "valkyrie") == 0)
-		address += 0x1000;
-
-	prom_printf("\n\n\n\naddress = %x\n", address);
-	btext_setup_display(width, height, depth, pitch, address);
-#endif /* CONFIG_BOOTX_TEXT && CONFIG_PPC32 */
-	return 1;
-}
-
 static int __init prom_set_color(ihandle ih, int i, int r, int g, int b)
 {
 	return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r);
@@ -1479,7 +1423,6 @@ static void __init prom_check_displays(void)
 	phandle node;
 	ihandle ih;
 	int i;
-	int got_display = 0;
 
 	static unsigned char default_colors[] = {
 		0x00, 0x00, 0x00,
@@ -1546,8 +1489,6 @@ static void __init prom_check_displays(void)
 					   clut[2]) != 0)
 				break;
 #endif /* CONFIG_LOGO_LINUX_CLUT224 */
-		if (!got_display)
-			got_display = setup_disp(node);
 	}
 }
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index bfa155c00ea..e68f848f24b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -294,8 +294,6 @@ unsigned long __init early_init(unsigned long dt_ptr)
 {
 	unsigned long offset = reloc_offset();
 
-	reloc_got2(offset);
-
 	/*
 	 * Identify the CPU type and fix up code sections
 	 * that depend on which cpu we have.
@@ -303,12 +301,6 @@ unsigned long __init early_init(unsigned long dt_ptr)
 	identify_cpu(offset, 0);
 	do_cpu_ftr_fixups(offset);
 
-#ifdef CONFIG_BOOTX_TEXT
-	btext_prepare_BAT();
-#endif
-
-	reloc_got2(-offset);
-
 	return KERNELBASE + offset;
 }
 
@@ -578,13 +570,13 @@ void __init setup_arch(char **cmdline_p)
 	/* so udelay does something sensible, assume <= 1000 bogomips */
 	loops_per_jiffy = 500000000 / HZ;
 
-#ifdef CONFIG_BOOTX_TEXT
-	map_boot_text();
-#endif
-
 	unflatten_device_tree();
 	finish_device_tree();
 
+#ifdef CONFIG_BOOTX_TEXT
+	init_boot_display();
+#endif
+
 #ifdef CONFIG_PPC_MULTIPLATFORM
 	/* This could be called "early setup arch", it must be done
 	 * now because xmon need it
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index bf13c14e66b..caeb02ee72c 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -188,13 +188,6 @@ void __init MMU_init(void)
 
 	if (ppc_md.progress)
 		ppc_md.progress("MMU:exit", 0x211);
-
-#ifdef CONFIG_BOOTX_TEXT
-	/* By default, we are no longer mapped */
-       	boot_text_mapped = 0;
-	/* Must be done last, or ppc_md.progress will die. */
-	map_boot_text();
-#endif
 }
 
 /* This is only called until mem_init is done. */
-- 
cgit v1.2.3-70-g09d2


From b15125fa81a522882247242e569bc7035e28cc82 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@freescale.com>
Date: Tue, 18 Oct 2005 17:42:09 -0500
Subject: [PATCH] powerpc: Some more fixes to allow building for a Book-E
 processor

Some minor fixes that are needed if we are building for a book-e
processor.

Signed-off-by: Kumar K. Gala <kumar.gala@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 14b2ffb6c2d..93441e7a292 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,14 +7,13 @@ EXTRA_CFLAGS	+= -mno-minimal-toc
 endif
 
 obj-y				:= fault.o mem.o lmb.o
-obj-$(CONFIG_PPC32)		+= init_32.o pgtable_32.o mmu_context_32.o \
-				   tlb_32.o
+obj-$(CONFIG_PPC32)		+= init_32.o pgtable_32.o mmu_context_32.o
 hash-$(CONFIG_PPC_MULTIPLATFORM) := hash_native_64.o
 obj-$(CONFIG_PPC64)		+= init_64.o pgtable_64.o mmu_context_64.o \
 				   hash_utils_64.o hash_low_64.o tlb_64.o \
 				   slb_low.o slb.o stab.o mmap.o imalloc.o \
 				   $(hash-y)
-obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o tlb_32.o
 obj-$(CONFIG_40x)		+= 4xx_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
 obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
-- 
cgit v1.2.3-70-g09d2


From 5c8c56ebdfb290e4feaac406518903f58714d874 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 22 Oct 2005 14:42:51 +1000
Subject: powerpc: Move agp_special_page export to where it is defined

... instead of exporting it in arch/*/kernel/ppc_ksyms.c.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/ppc_ksyms.c | 4 ----
 arch/powerpc/mm/init_32.c       | 1 +
 arch/ppc/kernel/ppc_ksyms.c     | 4 ----
 arch/ppc/mm/init.c              | 1 +
 4 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 40c9e67e1b2..254bf9c0b5b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -274,10 +274,6 @@ EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
 extern long *intercept_table;
 EXPORT_SYMBOL(intercept_table);
 #endif /* CONFIG_PPC_STD_MMU_32 */
-#ifdef CONFIG_PPC_PMAC
-extern unsigned long agp_special_page;
-EXPORT_SYMBOL(agp_special_page);
-#endif
 #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
 EXPORT_SYMBOL(__mtdcr);
 EXPORT_SYMBOL(__mfdcr);
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index caeb02ee72c..aa6a5440cec 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -69,6 +69,7 @@ unsigned long ppc_memoffset = PAGE_OFFSET;
 int boot_mapsize;
 #ifdef CONFIG_PPC_PMAC
 unsigned long agp_special_page;
+EXPORT_SYMBOL(agp_special_page);
 #endif
 
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index f4373fb9181..dcc83440f20 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -323,10 +323,6 @@ extern long *intercept_table;
 EXPORT_SYMBOL(intercept_table);
 #endif /* CONFIG_PPC_STD_MMU */
 EXPORT_SYMBOL(cur_cpu_spec);
-#ifdef CONFIG_PPC_PMAC
-extern unsigned long agp_special_page;
-EXPORT_SYMBOL(agp_special_page);
-#endif
 #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
 EXPORT_SYMBOL(__mtdcr);
 EXPORT_SYMBOL(__mfdcr);
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
index 5e9ef23b467..db94efd2536 100644
--- a/arch/ppc/mm/init.c
+++ b/arch/ppc/mm/init.c
@@ -69,6 +69,7 @@ int init_bootmem_done;
 int boot_mapsize;
 #ifdef CONFIG_PPC_PMAC
 unsigned long agp_special_page;
+EXPORT_SYMBOL(agp_special_page);
 #endif
 
 extern char _end[];
-- 
cgit v1.2.3-70-g09d2


From fa39dc437a41733adaba241fd9036760283a516a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 26 Oct 2005 21:54:21 +1000
Subject: powerpc32: Limit memory to lowmem if !CONFIG_HIGHMEM.

This trims off the extra unusable memory from the lmb structure,
so we don't try to use it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/init_32.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index aa6a5440cec..8dd1f7d0e23 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -154,10 +154,13 @@ void __init MMU_init(void)
 	 * in the fixed entries */
 	adjust_total_lowmem();
 #endif /* CONFIG_FSL_BOOKE */
+
 	if (total_lowmem > __max_low_memory) {
 		total_lowmem = __max_low_memory;
 #ifndef CONFIG_HIGHMEM
 		total_memory = total_lowmem;
+		lmb_enforce_memory_limit(total_lowmem);
+		lmb_analyze();
 #endif /* CONFIG_HIGHMEM */
 	}
 
-- 
cgit v1.2.3-70-g09d2


From e37bc5df8e96c72f27ec3579499726b656e4e641 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Mon, 24 Oct 2005 11:41:33 +1000
Subject: [PATCH] powerpc: Purge bootinfo.h

With ARCH=powerpc we assume the presence of a device tree, so we don't
require any support for the old bi_recs method of passing boot
parameters.  Likewise, we've never needed it for ppc64, but we still
had an include/asm-ppc64/bootinfo.h from which nothing was used.  This
patch removes that file, and all references to it in arch/ppc64 and
arch/powerpc.  A related, unused variable 'boot_mem_size' is also
removed from setup_32.c.  The bootinfo stuff remains in ARCH=ppc for
the time being.

Built and booted on Power5 (ARCH=ppc64 and ARCH=powerpc), built for
32-bit powermac (ARCH=powerpc and ARCH=ppc).

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom_init.c | 1 -
 arch/powerpc/kernel/setup_32.c  | 5 -----
 arch/powerpc/kernel/setup_64.c  | 1 -
 arch/powerpc/mm/init_32.c       | 1 -
 arch/powerpc/mm/mem.c           | 1 -
 arch/ppc64/kernel/prom.c        | 1 -
 arch/ppc64/kernel/prom_init.c   | 1 -
 7 files changed, 11 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 2ad9947bd67..9750b3cd8ec 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -41,7 +41,6 @@
 #include <asm/pgtable.h>
 #include <asm/pci.h>
 #include <asm/iommu.h>
-#include <asm/bootinfo.h>
 #include <asm/btext.h>
 #include <asm/sections.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 0cac112ca89..21ef643a020 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -24,7 +24,6 @@
 #include <asm/prom.h>
 #include <asm/processor.h>
 #include <asm/pgtable.h>
-#include <asm/bootinfo.h>
 #include <asm/setup.h>
 #include <asm/amigappc.h>
 #include <asm/smp.h>
@@ -60,10 +59,6 @@ struct ide_machdep_calls ppc_ide_md;
 int __irq_offset_value;
 EXPORT_SYMBOL(__irq_offset_value);
 
-/* Used with the BI_MEMSIZE bootinfo parameter to store the memory
-   size value reported by the boot loader. */
-unsigned long boot_mem_size;
-
 unsigned long ISA_DMA_THRESHOLD;
 unsigned int DMA_MODE_READ;
 unsigned int DMA_MODE_WRITE;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 721adeea601..a8f7ff5ab1a 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -37,7 +37,6 @@
 #include <asm/prom.h>
 #include <asm/processor.h>
 #include <asm/pgtable.h>
-#include <asm/bootinfo.h>
 #include <asm/smp.h>
 #include <asm/elf.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 8dd1f7d0e23..4612a79dfb6 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -43,7 +43,6 @@
 #include <asm/machdep.h>
 #include <asm/btext.h>
 #include <asm/tlb.h>
-#include <asm/bootinfo.h>
 #include <asm/prom.h>
 #include <asm/lmb.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5e9206715f0..695db6a588c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -43,7 +43,6 @@
 #include <asm/machdep.h>
 #include <asm/btext.h>
 #include <asm/tlb.h>
-#include <asm/bootinfo.h>
 #include <asm/prom.h>
 #include <asm/lmb.h>
 #include <asm/sections.h>
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index a0866f12647..cd41a47dd43 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -46,7 +46,6 @@
 #include <asm/pgtable.h>
 #include <asm/pci.h>
 #include <asm/iommu.h>
-#include <asm/bootinfo.h>
 #include <asm/ppcdebug.h>
 #include <asm/btext.h>
 #include <asm/sections.h>
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
index f252670874a..69924ba4d7d 100644
--- a/arch/ppc64/kernel/prom_init.c
+++ b/arch/ppc64/kernel/prom_init.c
@@ -44,7 +44,6 @@
 #include <asm/pgtable.h>
 #include <asm/pci.h>
 #include <asm/iommu.h>
-#include <asm/bootinfo.h>
 #include <asm/ppcdebug.h>
 #include <asm/btext.h>
 #include <asm/sections.h>
-- 
cgit v1.2.3-70-g09d2


From cffb09ce6ba7706c89c6df9ca8e72c81adda13f0 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@freescale.com>
Date: Wed, 26 Oct 2005 09:55:41 -0500
Subject: [PATCH] powerpc: Fix warning related to do_dabr

do_dabr() is not relevant on 40x or Book-E processors so dont build it

Signed-off-by: Kumar K. Gala <kumar.gala@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/fault.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3df641fa789..841d8b6323a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -80,6 +80,7 @@ static int store_updates_sp(struct pt_regs *regs)
 	return 0;
 }
 
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 static void do_dabr(struct pt_regs *regs, unsigned long error_code)
 {
 	siginfo_t info;
@@ -101,6 +102,7 @@ static void do_dabr(struct pt_regs *regs, unsigned long error_code)
 	info.si_addr = (void __user *)regs->nip;
 	force_sig_info(SIGTRAP, &info, current);
 }
+#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
 
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
-- 
cgit v1.2.3-70-g09d2


From 8b150478aeb1a8edb9015c2f7ac4da637ff65c45 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Fri, 28 Oct 2005 17:46:18 -0700
Subject: [PATCH] ppc: make phys_mem_access_prot() work with pfns instead of
 addresses

Change the phys_mem_access_prot() function to take a pfn instead of an
address.  This allows mmap64() to work on /dev/mem for addresses above 4G
on 32-bit architectures.  We start with a pfn in mmap_mem(), so there's no
need to convert to an address; in fact, it's actively bad, since the
conversion can overflow when the address is above 4G.

Similarly fix the ppc32 page_is_ram() function to avoid a conversion to an
address by directly comparing to max_pfn.  Working with max_pfn instead of
high_memory fixes page_is_ram() to give the right answer for highmem pages.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/mem.c         |  6 +++---
 arch/ppc/kernel/pci.c         |  5 +++--
 arch/ppc/mm/init.c            | 10 ++++------
 arch/ppc64/kernel/pci.c       |  5 +++--
 drivers/char/mem.c            |  4 +---
 drivers/video/fbmem.c         |  2 +-
 include/asm-powerpc/machdep.h |  2 +-
 include/asm-ppc/machdep.h     |  2 +-
 include/asm-ppc/pci.h         |  2 +-
 include/asm-ppc/pgtable.h     |  2 +-
 include/asm-ppc64/pci.h       |  2 +-
 include/asm-ppc64/pgtable.h   |  2 +-
 12 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 695db6a588c..3ca331728d2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -88,13 +88,13 @@ int page_is_ram(unsigned long pfn)
 }
 EXPORT_SYMBOL(page_is_ram);
 
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
 	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+		return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
 
-	if (!page_is_ram(addr >> PAGE_SHIFT))
+	if (!page_is_ram(pfn))
 		vma_prot = __pgprot(pgprot_val(vma_prot)
 				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
 	return vma_prot;
diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c
index ad4ef2aaa6a..e8f4e576750 100644
--- a/arch/ppc/kernel/pci.c
+++ b/arch/ppc/kernel/pci.c
@@ -1594,16 +1594,17 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
  * above routine
  */
 pgprot_t pci_phys_mem_access_prot(struct file *file,
-				  unsigned long offset,
+				  unsigned long pfn,
 				  unsigned long size,
 				  pgprot_t protection)
 {
 	struct pci_dev *pdev = NULL;
 	struct resource *found = NULL;
 	unsigned long prot = pgprot_val(protection);
+	unsigned long offset = pfn << PAGE_SHIFT;
 	int i;
 
-	if (page_is_ram(offset >> PAGE_SHIFT))
+	if (page_is_ram(pfn))
 		return prot;
 
 	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
index db94efd2536..99b48abd329 100644
--- a/arch/ppc/mm/init.c
+++ b/arch/ppc/mm/init.c
@@ -637,18 +637,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
  */
 int page_is_ram(unsigned long pfn)
 {
-	unsigned long paddr = (pfn << PAGE_SHIFT);
-
-	return paddr < __pa(high_memory);
+	return pfn < max_pfn;
 }
 
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
 	if (ppc_md.phys_mem_access_prot)
-		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+		return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
 
-	if (!page_is_ram(addr >> PAGE_SHIFT))
+	if (!page_is_ram(pfn))
 		vma_prot = __pgprot(pgprot_val(vma_prot)
 				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
 	return vma_prot;
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index b2fb6746f00..3d2106b022a 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -726,16 +726,17 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
  * above routine
  */
 pgprot_t pci_phys_mem_access_prot(struct file *file,
-				  unsigned long offset,
+				  unsigned long pfn,
 				  unsigned long size,
 				  pgprot_t protection)
 {
 	struct pci_dev *pdev = NULL;
 	struct resource *found = NULL;
 	unsigned long prot = pgprot_val(protection);
+	unsigned long offset = pfn << PAGE_SHIFT;
 	int i;
 
-	if (page_is_ram(offset >> PAGE_SHIFT))
+	if (page_is_ram(pfn))
 		return __pgprot(prot);
 
 	prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index f182752fe91..9df928d4f68 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -231,9 +231,7 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
 #if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-
-	vma->vm_page_prot = phys_mem_access_prot(file, offset,
+	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
 						 vma->vm_end - vma->vm_start,
 						 vma->vm_page_prot);
 #elif defined(pgprot_noncached)
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 70be7009f8a..ca02aa2bfce 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -918,7 +918,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 	}
 #endif
 #elif defined(__powerpc__)
-	vma->vm_page_prot = phys_mem_access_prot(file, off,
+	vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
 						 vma->vm_end - vma->vm_start,
 						 vma->vm_page_prot);
 #elif defined(__alpha__)
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index 706508b1b04..451b345cfc7 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -144,7 +144,7 @@ struct machdep_calls {
 	
 	/* Get access protection for /dev/mem */
 	pgprot_t	(*phys_mem_access_prot)(struct file *file,
-						unsigned long offset,
+						unsigned long pfn,
 						unsigned long size,
 						pgprot_t vma_prot);
 
diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h
index 6c6d23abbe9..f01255bd1dc 100644
--- a/include/asm-ppc/machdep.h
+++ b/include/asm-ppc/machdep.h
@@ -98,7 +98,7 @@ struct machdep_calls {
 
 	/* Get access protection for /dev/mem */
 	pgprot_t	(*phys_mem_access_prot)(struct file *file,
-						unsigned long offset,
+						unsigned long pfn,
 						unsigned long size,
 						pgprot_t vma_prot);
 
diff --git a/include/asm-ppc/pci.h b/include/asm-ppc/pci.h
index 643740dd727..61434edbad7 100644
--- a/include/asm-ppc/pci.h
+++ b/include/asm-ppc/pci.h
@@ -126,7 +126,7 @@ extern void pcibios_add_platform_entries(struct pci_dev *dev);
 
 struct file;
 extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
-					 unsigned long offset,
+					 unsigned long pfn,
 					 unsigned long size,
 					 pgprot_t prot);
 
diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
index eee601bb9ad..b28a713ba86 100644
--- a/include/asm-ppc/pgtable.h
+++ b/include/asm-ppc/pgtable.h
@@ -705,7 +705,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
 
 struct file;
-extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				     unsigned long size, pgprot_t vma_prot);
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 
diff --git a/include/asm-ppc64/pci.h b/include/asm-ppc64/pci.h
index a88bbfc2696..342e2d75555 100644
--- a/include/asm-ppc64/pci.h
+++ b/include/asm-ppc64/pci.h
@@ -168,7 +168,7 @@ extern void pcibios_add_platform_entries(struct pci_dev *dev);
 
 struct file;
 extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
-					 unsigned long offset,
+					 unsigned long pfn,
 					 unsigned long size,
 					 pgprot_t prot);
 
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index c83679c9d2b..8cf5991540e 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -471,7 +471,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
 
 struct file;
-extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				     unsigned long size, pgprot_t vma_prot);
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 
-- 
cgit v1.2.3-70-g09d2


From 3ee1fcac33eae824422b9b98d972a85e79672426 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 29 Oct 2005 22:10:38 +1000
Subject: powerpc: import a gfp_t fix to arch/powerpc/mm/pgtable_32.c

This applies the same fix as Al Viro recently made to
arch/ppc/mm/pgtable.c.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/pgtable_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 5792e533916..f54fb9d3927 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -114,9 +114,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
 #else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
 
 	ptepage = alloc_pages(flags, 0);
-- 
cgit v1.2.3-70-g09d2


From cf00a8d18b9a1c2d55b2728e89125c234e821db5 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 31 Oct 2005 13:07:02 +1100
Subject: powerpc: Fix bug arising from having multiple memory_limit variables

We had a static memory_limit in prom.c, and then another one defined
in setup_64.c and used in numa.c, which resulted in the kernel crashing
when mem=xxx was given on the command line.  This puts the declaration
in system.h and the definition in mem.c.  This also moves the
definition of tce_alloc_start/end out of setup_64.c.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom.c             | 4 +---
 arch/powerpc/kernel/setup_64.c         | 9 ---------
 arch/powerpc/mm/mem.c                  | 1 +
 arch/powerpc/mm/numa.c                 | 2 +-
 arch/powerpc/platforms/iseries/setup.c | 2 --
 arch/ppc64/kernel/prom.c               | 3 ++-
 include/asm-powerpc/system.h           | 1 +
 include/asm-ppc64/system.h             | 1 +
 8 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6309a1a17c4..2eccd0e159e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -78,15 +78,13 @@ extern struct rtas_t rtas;
 extern struct lmb lmb;
 extern unsigned long klimit;
 
-static unsigned long memory_limit;
-
 static int __initdata dt_root_addr_cells;
 static int __initdata dt_root_size_cells;
 
 #ifdef CONFIG_PPC64
 static int __initdata iommu_is_off;
 int __initdata iommu_force_on;
-extern unsigned long tce_alloc_start, tce_alloc_end;
+unsigned long tce_alloc_start, tce_alloc_end;
 #endif
 
 typedef u32 cell_t;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 681537f8ea1..40c48100bf1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -631,15 +631,6 @@ static int ppc64_panic_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
-/*
- * These three variables are used to save values passed to us by prom_init()
- * via the device tree. The TCE variables are needed because with a memory_limit
- * in force we may need to explicitly map the TCE are at the top of RAM.
- */
-unsigned long memory_limit;
-unsigned long tce_alloc_start;
-unsigned long tce_alloc_end;
-
 #ifdef CONFIG_PPC_ISERIES
 /*
  * On iSeries we just parse the mem=X option from the command line.
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3ca331728d2..e43e8ef7008 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -59,6 +59,7 @@
 
 int init_bootmem_done;
 int mem_init_done;
+unsigned long memory_limit;
 
 /*
  * This is called by /dev/mem to know if a given address has to
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cb864b8f275..4035cad8d7f 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -20,6 +20,7 @@
 #include <asm/lmb.h>
 #include <asm/machdep.h>
 #include <asm/abs_addr.h>
+#include <asm/system.h>
 
 static int numa_enabled = 1;
 
@@ -300,7 +301,6 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsig
 	 * we've already adjusted it for the limit and it takes care of
 	 * having memory holes below the limit.
 	 */
-	extern unsigned long memory_limit;
 
 	if (! memory_limit)
 		return size;
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b2790148178..1544c6f10a3 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -302,8 +302,6 @@ static void __init iSeries_get_cmdline(void)
 
 static void __init iSeries_init_early(void)
 {
-	extern unsigned long memory_limit;
-
 	DBG(" -> iSeries_init_early()\n");
 
 	ppc64_firmware_features = FW_FEATURE_ISERIES;
diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c
index cd41a47dd43..97bfceb5353 100644
--- a/arch/ppc64/kernel/prom.c
+++ b/arch/ppc64/kernel/prom.c
@@ -83,6 +83,8 @@ static int __initdata dt_root_addr_cells;
 static int __initdata dt_root_size_cells;
 static int __initdata iommu_is_off;
 int __initdata iommu_force_on;
+unsigned long tce_alloc_start, tce_alloc_end;
+
 typedef u32 cell_t;
 
 #if 0
@@ -1063,7 +1065,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 {
 	u32 *prop;
 	u64 *prop64;
-	extern unsigned long tce_alloc_start, tce_alloc_end;
 
 	DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index e926e43c4ae..5b2ecbc4790 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -179,6 +179,7 @@ extern struct task_struct *_switch(struct thread_struct *prev,
 
 extern unsigned int rtas_data;
 extern int mem_init_done;	/* set on boot once kmalloc can be called */
+extern unsigned long memory_limit;
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index fd7c1f890c4..99b8ca52f10 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -136,6 +136,7 @@ static inline void flush_spe_to_thread(struct task_struct *t)
 }
 
 extern int mem_init_done;	/* set on boot once kmalloc can be called */
+extern unsigned long memory_limit;
 
 /* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */
 extern unsigned char e2a(unsigned char);
-- 
cgit v1.2.3-70-g09d2


From 734d6524800b6a8362666e893a5f3f29b9ef0be9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 31 Oct 2005 13:57:01 +1100
Subject: powerpc: apply recent changes to merged code

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/of_device.c | 2 ++
 arch/powerpc/kernel/ptrace.c    | 2 +-
 arch/powerpc/kernel/ptrace32.c  | 4 ++--
 arch/powerpc/kernel/time.c      | 4 ----
 arch/powerpc/lib/locks.c        | 1 +
 arch/powerpc/mm/imalloc.c       | 5 -----
 6 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
index 76671881451..7065e40e2f4 100644
--- a/arch/powerpc/kernel/of_device.c
+++ b/arch/powerpc/kernel/of_device.c
@@ -4,6 +4,8 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+
 #include <asm/errno.h>
 #include <asm/of_device.h>
 
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 943425a9335..568ea335d61 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -248,7 +248,7 @@ void ptrace_disable(struct task_struct *child)
 	clear_single_step(child);
 }
 
-int sys_ptrace(long request, long pid, long addr, long data)
+long sys_ptrace(long request, long pid, long addr, long data)
 {
 	struct task_struct *child;
 	int ret = -EPERM;
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 544368277d7..91eb952e029 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -40,8 +40,8 @@
  * in exit.c or in signal.c.
  */
 
-int compat_sys_ptrace(int request, int pid, unsigned long addr,
-		      unsigned long data)
+long compat_sys_ptrace(int request, int pid, unsigned long addr,
+		       unsigned long data)
 {
 	struct task_struct *child;
 	int ret = -EPERM;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ed5c38fb146..23436b6c188 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -70,10 +70,6 @@
 #include <asm/iSeries/HvCallXm.h>
 #endif
 
-u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /* keep track of when we need to update the rtc */
 time_t last_rtc_update;
 extern int piranha_simulator;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 4b8c5ad5e7d..3794715b297 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -17,6 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
+#include <linux/smp.h>
 
 /* waiting for a spinlock... */
 #if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c
index c65b87b9275..f4ca29cf536 100644
--- a/arch/powerpc/mm/imalloc.c
+++ b/arch/powerpc/mm/imalloc.c
@@ -300,12 +300,7 @@ void im_free(void * addr)
 	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
 			*p = tmp->next;
-
-			/* XXX: do we need the lock? */
-			spin_lock(&init_mm.page_table_lock);
 			unmap_vm_area(tmp);
-			spin_unlock(&init_mm.page_table_lock);
-
 			kfree(tmp);
 			up(&imlist_sem);
 			return;
-- 
cgit v1.2.3-70-g09d2


From e2f2e58e7968f8446b1078a20a18bf8ea12b4fbc Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 31 Oct 2005 14:40:03 +1100
Subject: powerpc: import a fix from arch/ppc/mm/pgtable.c

... namely, the change to the 2-argument pte_alloc_kernel.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/pgtable_32.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/powerpc/mm')

diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index f54fb9d3927..f4e5ac12261 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -279,18 +279,16 @@ map_page(unsigned long va, phys_addr_t pa, int flags)
 	pte_t *pg;
 	int err = -ENOMEM;
 
-	spin_lock(&init_mm.page_table_lock);
 	/* Use upper 10 bits of VA to index the first level map */
 	pd = pmd_offset(pgd_offset_k(va), va);
 	/* Use middle 10 bits of VA to index the second-level map */
-	pg = pte_alloc_kernel(&init_mm, pd, va);
+	pg = pte_alloc_kernel(pd, va);
 	if (pg != 0) {
 		err = 0;
 		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		if (mem_init_done)
 			flush_HPTE(0, va, pmd_val(*pd));
 	}
-	spin_unlock(&init_mm.page_table_lock);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2