From 70d64ceaa1a84d2502405422a4dfd3f87786a347 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 10 Oct 2005 21:52:43 +1000 Subject: powerpc: Rename files to have consistent _32/_64 suffixes This doesn't change any code, just renames things so we consistently have foo_32.c and foo_64.c where we have separate 32- and 64-bit versions. Signed-off-by: Paul Mackerras --- arch/powerpc/mm/Makefile | 8 +- arch/powerpc/mm/hash_32.S | 618 --------------------------------------- arch/powerpc/mm/hash_low_32.S | 618 +++++++++++++++++++++++++++++++++++++++ arch/powerpc/mm/init.c | 258 ---------------- arch/powerpc/mm/init64.c | 259 ---------------- arch/powerpc/mm/init_32.c | 258 ++++++++++++++++ arch/powerpc/mm/init_64.c | 259 ++++++++++++++++ arch/powerpc/mm/mem64.c | 259 ---------------- arch/powerpc/mm/mmu_context.c | 86 ------ arch/powerpc/mm/mmu_context64.c | 63 ---- arch/powerpc/mm/mmu_context_32.c | 86 ++++++ arch/powerpc/mm/mmu_context_64.c | 63 ++++ arch/powerpc/mm/pgtable.c | 469 ----------------------------- arch/powerpc/mm/pgtable64.c | 357 ---------------------- arch/powerpc/mm/pgtable_32.c | 469 +++++++++++++++++++++++++++++ arch/powerpc/mm/pgtable_64.c | 357 ++++++++++++++++++++++ arch/powerpc/mm/ppc_mmu.c | 285 ------------------ arch/powerpc/mm/ppc_mmu_32.c | 285 ++++++++++++++++++ arch/powerpc/mm/tlb.c | 183 ------------ arch/powerpc/mm/tlb_32.c | 183 ++++++++++++ 20 files changed, 2582 insertions(+), 2841 deletions(-) delete mode 100644 arch/powerpc/mm/hash_32.S create mode 100644 arch/powerpc/mm/hash_low_32.S delete mode 100644 arch/powerpc/mm/init.c delete mode 100644 arch/powerpc/mm/init64.c create mode 100644 arch/powerpc/mm/init_32.c create mode 100644 arch/powerpc/mm/init_64.c delete mode 100644 arch/powerpc/mm/mem64.c delete mode 100644 arch/powerpc/mm/mmu_context.c delete mode 100644 arch/powerpc/mm/mmu_context64.c create mode 100644 arch/powerpc/mm/mmu_context_32.c create mode 100644 arch/powerpc/mm/mmu_context_64.c delete mode 100644 arch/powerpc/mm/pgtable.c delete mode 100644 arch/powerpc/mm/pgtable64.c create mode 100644 arch/powerpc/mm/pgtable_32.c create mode 100644 arch/powerpc/mm/pgtable_64.c delete mode 100644 arch/powerpc/mm/ppc_mmu.c create mode 100644 arch/powerpc/mm/ppc_mmu_32.c delete mode 100644 arch/powerpc/mm/tlb.c create mode 100644 arch/powerpc/mm/tlb_32.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index afd3be112b7..35497deeb4b 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -3,10 +3,10 @@ # obj-y := fault.o mem.o lmb.o -obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ - tlb.o -obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o -obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o +obj-$(CONFIG_PPC32) += init_32.o pgtable_32.o mmu_context_32.o \ + tlb_32.o +obj-$(CONFIG_PPC64) += init_64.o pgtable_64.o mmu_context_64.o +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S deleted file mode 100644 index 57278a8dd13..00000000000 --- a/arch/powerpc/mm/hash_32.S +++ /dev/null @@ -1,618 +0,0 @@ -/* - * arch/ppc/kernel/hashtable.S - * - * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $ - * - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP - * Copyright (C) 1996 Cort Dougan - * Adapted for Power Macintosh by Paul Mackerras. - * Low-level exception handlers and MMU support - * rewritten by Paul Mackerras. - * Copyright (C) 1996 Paul Mackerras. - * - * This file contains low-level assembler routines for managing - * the PowerPC MMU hash table. (PPC 8xx processors don't use a - * hash table, so this file is not used on them.) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP - .comm mmu_hash_lock,4 -#endif /* CONFIG_SMP */ - -/* - * Sync CPUs with hash_page taking & releasing the hash - * table lock - */ -#ifdef CONFIG_SMP - .text -_GLOBAL(hash_page_sync) - lis r8,mmu_hash_lock@h - ori r8,r8,mmu_hash_lock@l - lis r0,0x0fff - b 10f -11: lwz r6,0(r8) - cmpwi 0,r6,0 - bne 11b -10: lwarx r6,0,r8 - cmpwi 0,r6,0 - bne- 11b - stwcx. r0,0,r8 - bne- 10b - isync - eieio - li r0,0 - stw r0,0(r8) - blr -#endif - -/* - * Load a PTE into the hash table, if possible. - * The address is in r4, and r3 contains an access flag: - * _PAGE_RW (0x400) if a write. - * r9 contains the SRR1 value, from which we use the MSR_PR bit. - * SPRG3 contains the physical address of the current task's thread. - * - * Returns to the caller if the access is illegal or there is no - * mapping for the address. Otherwise it places an appropriate PTE - * in the hash table and returns from the exception. - * Uses r0, r3 - r8, ctr, lr. - */ - .text -_GLOBAL(hash_page) -#ifdef CONFIG_PPC64BRIDGE - mfmsr r0 - clrldi r0,r0,1 /* make sure it's in 32-bit mode */ - MTMSRD(r0) - isync -#endif - tophys(r7,0) /* gets -KERNELBASE into r7 */ -#ifdef CONFIG_SMP - addis r8,r7,mmu_hash_lock@h - ori r8,r8,mmu_hash_lock@l - lis r0,0x0fff - b 10f -11: lwz r6,0(r8) - cmpwi 0,r6,0 - bne 11b -10: lwarx r6,0,r8 - cmpwi 0,r6,0 - bne- 11b - stwcx. r0,0,r8 - bne- 10b - isync -#endif - /* Get PTE (linux-style) and check access */ - lis r0,KERNELBASE@h /* check if kernel address */ - cmplw 0,r4,r0 - mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */ - ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - lwz r5,PGDIR(r8) /* virt page-table root */ - blt+ 112f /* assume user more likely */ - lis r5,swapper_pg_dir@ha /* if kernel address, use */ - addi r5,r5,swapper_pg_dir@l /* kernel page table */ - rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: add r5,r5,r7 /* convert to phys addr */ - rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ - lwz r8,0(r5) /* get pmd entry */ - rlwinm. r8,r8,0,0,19 /* extract address of pte page */ -#ifdef CONFIG_SMP - beq- hash_page_out /* return if no mapping */ -#else - /* XXX it seems like the 601 will give a machine fault on the - rfi if its alignment is wrong (bottom 4 bits of address are - 8 or 0xc) and we have had a not-taken conditional branch - to the address following the rfi. */ - beqlr- -#endif - rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ - rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ - ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE - - /* - * Update the linux PTE atomically. We do the lwarx up-front - * because almost always, there won't be a permission violation - * and there won't already be an HPTE, and thus we will have - * to update the PTE to set _PAGE_HASHPTE. -- paulus. - */ -retry: - lwarx r6,0,r8 /* get linux-style pte */ - andc. r5,r3,r6 /* check access & ~permission */ -#ifdef CONFIG_SMP - bne- hash_page_out /* return if access not permitted */ -#else - bnelr- -#endif - or r5,r0,r6 /* set accessed/dirty bits */ - stwcx. r5,0,r8 /* attempt to update PTE */ - bne- retry /* retry if someone got there first */ - - mfsrin r3,r4 /* get segment reg for segment */ - mfctr r0 - stw r0,_CTR(r11) - bl create_hpte /* add the hash table entry */ - -#ifdef CONFIG_SMP - eieio - addis r8,r7,mmu_hash_lock@ha - li r0,0 - stw r0,mmu_hash_lock@l(r8) -#endif - - /* Return from the exception */ - lwz r5,_CTR(r11) - mtctr r5 - lwz r0,GPR0(r11) - lwz r7,GPR7(r11) - lwz r8,GPR8(r11) - b fast_exception_return - -#ifdef CONFIG_SMP -hash_page_out: - eieio - addis r8,r7,mmu_hash_lock@ha - li r0,0 - stw r0,mmu_hash_lock@l(r8) - blr -#endif /* CONFIG_SMP */ - -/* - * Add an entry for a particular page to the hash table. - * - * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval) - * - * We assume any necessary modifications to the pte (e.g. setting - * the accessed bit) have already been done and that there is actually - * a hash table in use (i.e. we're not on a 603). - */ -_GLOBAL(add_hash_page) - mflr r0 - stw r0,4(r1) - - /* Convert context and va to VSID */ - mulli r3,r3,897*16 /* multiply context by context skew */ - rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ - mulli r0,r0,0x111 /* multiply by ESID skew */ - add r3,r3,r0 /* note create_hpte trims to 24 bits */ - -#ifdef CONFIG_SMP - rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ - lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ - oris r8,r8,12 -#endif /* CONFIG_SMP */ - - /* - * We disable interrupts here, even on UP, because we don't - * want to race with hash_page, and because we want the - * _PAGE_HASHPTE bit to be a reliable indication of whether - * the HPTE exists (or at least whether one did once). - * We also turn off the MMU for data accesses so that we - * we can't take a hash table miss (assuming the code is - * covered by a BAT). -- paulus - */ - mfmsr r10 - SYNC - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear MSR_DR */ - mtmsr r0 - SYNC_601 - isync - - tophys(r7,0) - -#ifdef CONFIG_SMP - addis r9,r7,mmu_hash_lock@ha - addi r9,r9,mmu_hash_lock@l -10: lwarx r0,0,r9 /* take the mmu_hash_lock */ - cmpi 0,r0,0 - bne- 11f - stwcx. r8,0,r9 - beq+ 12f -11: lwz r0,0(r9) - cmpi 0,r0,0 - beq 10b - b 11b -12: isync -#endif - - /* - * Fetch the linux pte and test and set _PAGE_HASHPTE atomically. - * If _PAGE_HASHPTE was already set, we don't replace the existing - * HPTE, so we just unlock and return. - */ - mr r8,r5 - rlwimi r8,r4,22,20,29 -1: lwarx r6,0,r8 - andi. r0,r6,_PAGE_HASHPTE - bne 9f /* if HASHPTE already set, done */ - ori r5,r6,_PAGE_HASHPTE - stwcx. r5,0,r8 - bne- 1b - - bl create_hpte - -9: -#ifdef CONFIG_SMP - eieio - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ -#endif - - /* reenable interrupts and DR */ - mtmsr r10 - SYNC_601 - isync - - lwz r0,4(r1) - mtlr r0 - blr - -/* - * This routine adds a hardware PTE to the hash table. - * It is designed to be called with the MMU either on or off. - * r3 contains the VSID, r4 contains the virtual address, - * r5 contains the linux PTE, r6 contains the old value of the - * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the - * offset to be added to addresses (0 if the MMU is on, - * -KERNELBASE if it is off). - * On SMP, the caller should have the mmu_hash_lock held. - * We assume that the caller has (or will) set the _PAGE_HASHPTE - * bit in the linux PTE in memory. The value passed in r6 should - * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set - * this routine will skip the search for an existing HPTE. - * This procedure modifies r0, r3 - r6, r8, cr0. - * -- paulus. - * - * For speed, 4 of the instructions get patched once the size and - * physical address of the hash table are known. These definitions - * of Hash_base and Hash_bits below are just an example. - */ -Hash_base = 0xc0180000 -Hash_bits = 12 /* e.g. 256kB hash table */ -Hash_msk = (((1 << Hash_bits) - 1) * 64) - -#ifndef CONFIG_PPC64BRIDGE -/* defines for the PTE format for 32-bit PPCs */ -#define PTE_SIZE 8 -#define PTEG_SIZE 64 -#define LG_PTEG_SIZE 6 -#define LDPTEu lwzu -#define STPTE stw -#define CMPPTE cmpw -#define PTE_H 0x40 -#define PTE_V 0x80000000 -#define TST_V(r) rlwinm. r,r,0,0,0 -#define SET_V(r) oris r,r,PTE_V@h -#define CLR_V(r,t) rlwinm r,r,0,1,31 - -#else -/* defines for the PTE format for 64-bit PPCs */ -#define PTE_SIZE 16 -#define PTEG_SIZE 128 -#define LG_PTEG_SIZE 7 -#define LDPTEu ldu -#define STPTE std -#define CMPPTE cmpd -#define PTE_H 2 -#define PTE_V 1 -#define TST_V(r) andi. r,r,PTE_V -#define SET_V(r) ori r,r,PTE_V -#define CLR_V(r,t) li t,PTE_V; andc r,r,t -#endif /* CONFIG_PPC64BRIDGE */ - -#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) -#define HASH_RIGHT 31-LG_PTEG_SIZE - -_GLOBAL(create_hpte) - /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ - rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ - and r8,r8,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ - ori r8,r8,0xe14 /* clear out reserved bits and M */ - andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ -BEGIN_FTR_SECTION - ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ -END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) - - /* Construct the high word of the PPC-style PTE (r5) */ -#ifndef CONFIG_PPC64BRIDGE - rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ - rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ -#else /* CONFIG_PPC64BRIDGE */ - clrlwi r3,r3,8 /* reduce vsid to 24 bits */ - sldi r5,r3,12 /* shift vsid into position */ - rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */ -#endif /* CONFIG_PPC64BRIDGE */ - SET_V(r5) /* set V (valid) bit */ - - /* Get the address of the primary PTE group in the hash table (r3) */ -_GLOBAL(hash_page_patch_A) - addis r0,r7,Hash_base@h /* base address of hash table */ - rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ - rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ - xor r3,r3,r0 /* make primary hash */ - li r0,8 /* PTEs/group */ - - /* - * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search - * if it is clear, meaning that the HPTE isn't there already... - */ - andi. r6,r6,_PAGE_HASHPTE - beq+ 10f /* no PTE: go look for an empty slot */ - tlbie r4 - - addis r4,r7,htab_hash_searches@ha - lwz r6,htab_hash_searches@l(r4) - addi r6,r6,1 /* count how many searches we do */ - stw r6,htab_hash_searches@l(r4) - - /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ - mtctr r0 - addi r4,r3,-PTE_SIZE -1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ - CMPPTE 0,r6,r5 - bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_slot - - /* Search the secondary PTEG for a matching PTE */ - ori r5,r5,PTE_H /* set H (secondary hash) bit */ -_GLOBAL(hash_page_patch_B) - xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ - xori r4,r4,(-PTEG_SIZE & 0xffff) - addi r4,r4,-PTE_SIZE - mtctr r0 -2: LDPTEu r6,PTE_SIZE(r4) - CMPPTE 0,r6,r5 - bdnzf 2,2b - beq+ found_slot - xori r5,r5,PTE_H /* clear H bit again */ - - /* Search the primary PTEG for an empty slot */ -10: mtctr r0 - addi r4,r3,-PTE_SIZE /* search primary PTEG */ -1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ - TST_V(r6) /* test valid bit */ - bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ found_empty - - /* update counter of times that the primary PTEG is full */ - addis r4,r7,primary_pteg_full@ha - lwz r6,primary_pteg_full@l(r4) - addi r6,r6,1 - stw r6,primary_pteg_full@l(r4) - - /* Search the secondary PTEG for an empty slot */ - ori r5,r5,PTE_H /* set H (secondary hash) bit */ -_GLOBAL(hash_page_patch_C) - xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ - xori r4,r4,(-PTEG_SIZE & 0xffff) - addi r4,r4,-PTE_SIZE - mtctr r0 -2: LDPTEu r6,PTE_SIZE(r4) - TST_V(r6) - bdnzf 2,2b - beq+ found_empty - xori r5,r5,PTE_H /* clear H bit again */ - - /* - * Choose an arbitrary slot in the primary PTEG to overwrite. - * Since both the primary and secondary PTEGs are full, and we - * have no information that the PTEs in the primary PTEG are - * more important or useful than those in the secondary PTEG, - * and we know there is a definite (although small) speed - * advantage to putting the PTE in the primary PTEG, we always - * put the PTE in the primary PTEG. - */ - addis r4,r7,next_slot@ha - lwz r6,next_slot@l(r4) - addi r6,r6,PTE_SIZE - andi. r6,r6,7*PTE_SIZE - stw r6,next_slot@l(r4) - add r4,r3,r6 - -#ifndef CONFIG_SMP - /* Store PTE in PTEG */ -found_empty: - STPTE r5,0(r4) -found_slot: - STPTE r8,PTE_SIZE/2(r4) - -#else /* CONFIG_SMP */ -/* - * Between the tlbie above and updating the hash table entry below, - * another CPU could read the hash table entry and put it in its TLB. - * There are 3 cases: - * 1. using an empty slot - * 2. updating an earlier entry to change permissions (i.e. enable write) - * 3. taking over the PTE for an unrelated address - * - * In each case it doesn't really matter if the other CPUs have the old - * PTE in their TLB. So we don't need to bother with another tlbie here, - * which is convenient as we've overwritten the register that had the - * address. :-) The tlbie above is mainly to make sure that this CPU comes - * and gets the new PTE from the hash table. - * - * We do however have to make sure that the PTE is never in an invalid - * state with the V bit set. - */ -found_empty: -found_slot: - CLR_V(r5,r0) /* clear V (valid) bit in PTE */ - STPTE r5,0(r4) - sync - TLBSYNC - STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */ - sync - SET_V(r5) - STPTE r5,0(r4) /* finally set V bit in PTE */ -#endif /* CONFIG_SMP */ - - sync /* make sure pte updates get to memory */ - blr - - .comm next_slot,4 - .comm primary_pteg_full,4 - .comm htab_hash_searches,4 - -/* - * Flush the entry for a particular page from the hash table. - * - * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval, - * int count) - * - * We assume that there is a hash table in use (Hash != 0). - */ -_GLOBAL(flush_hash_pages) - tophys(r7,0) - - /* - * We disable interrupts here, even on UP, because we want - * the _PAGE_HASHPTE bit to be a reliable indication of - * whether the HPTE exists (or at least whether one did once). - * We also turn off the MMU for data accesses so that we - * we can't take a hash table miss (assuming the code is - * covered by a BAT). -- paulus - */ - mfmsr r10 - SYNC - rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ - rlwinm r0,r0,0,28,26 /* clear MSR_DR */ - mtmsr r0 - SYNC_601 - isync - - /* First find a PTE in the range that has _PAGE_HASHPTE set */ - rlwimi r5,r4,22,20,29 -1: lwz r0,0(r5) - cmpwi cr1,r6,1 - andi. r0,r0,_PAGE_HASHPTE - bne 2f - ble cr1,19f - addi r4,r4,0x1000 - addi r5,r5,4 - addi r6,r6,-1 - b 1b - - /* Convert context and va to VSID */ -2: mulli r3,r3,897*16 /* multiply context by context skew */ - rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ - mulli r0,r0,0x111 /* multiply by ESID skew */ - add r3,r3,r0 /* note code below trims to 24 bits */ - - /* Construct the high word of the PPC-style PTE (r11) */ -#ifndef CONFIG_PPC64BRIDGE - rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ - rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ -#else /* CONFIG_PPC64BRIDGE */ - clrlwi r3,r3,8 /* reduce vsid to 24 bits */ - sldi r11,r3,12 /* shift vsid into position */ - rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */ -#endif /* CONFIG_PPC64BRIDGE */ - SET_V(r11) /* set V (valid) bit */ - -#ifdef CONFIG_SMP - addis r9,r7,mmu_hash_lock@ha - addi r9,r9,mmu_hash_lock@l - rlwinm r8,r1,0,0,18 - add r8,r8,r7 - lwz r8,TI_CPU(r8) - oris r8,r8,9 -10: lwarx r0,0,r9 - cmpi 0,r0,0 - bne- 11f - stwcx. r8,0,r9 - beq+ 12f -11: lwz r0,0(r9) - cmpi 0,r0,0 - beq 10b - b 11b -12: isync -#endif - - /* - * Check the _PAGE_HASHPTE bit in the linux PTE. If it is - * already clear, we're done (for this pte). If not, - * clear it (atomically) and proceed. -- paulus. - */ -33: lwarx r8,0,r5 /* fetch the pte */ - andi. r0,r8,_PAGE_HASHPTE - beq 8f /* done if HASHPTE is already clear */ - rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ - stwcx. r8,0,r5 /* update the pte */ - bne- 33b - - /* Get the address of the primary PTE group in the hash table (r3) */ -_GLOBAL(flush_hash_patch_A) - addis r8,r7,Hash_base@h /* base address of hash table */ - rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ - rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ - xor r8,r0,r8 /* make primary hash */ - - /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ - li r0,8 /* PTEs/group */ - mtctr r0 - addi r12,r8,-PTE_SIZE -1: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */ - CMPPTE 0,r0,r11 - bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ - beq+ 3f - - /* Search the secondary PTEG for a matching PTE */ - ori r11,r11,PTE_H /* set H (secondary hash) bit */ - li r0,8 /* PTEs/group */ -_GLOBAL(flush_hash_patch_B) - xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ - xori r12,r12,(-PTEG_SIZE & 0xffff) - addi r12,r12,-PTE_SIZE - mtctr r0 -2: LDPTEu r0,PTE_SIZE(r12) - CMPPTE 0,r0,r11 - bdnzf 2,2b - xori r11,r11,PTE_H /* clear H again */ - bne- 4f /* should rarely fail to find it */ - -3: li r0,0 - STPTE r0,0(r12) /* invalidate entry */ -4: sync - tlbie r4 /* in hw tlb too */ - sync - -8: ble cr1,9f /* if all ptes checked */ -81: addi r6,r6,-1 - addi r5,r5,4 /* advance to next pte */ - addi r4,r4,0x1000 - lwz r0,0(r5) /* check next pte */ - cmpwi cr1,r6,1 - andi. r0,r0,_PAGE_HASHPTE - bne 33b - bgt cr1,81b - -9: -#ifdef CONFIG_SMP - TLBSYNC - li r0,0 - stw r0,0(r9) /* clear mmu_hash_lock */ -#endif - -19: mtmsr r10 - SYNC_601 - isync - blr diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S new file mode 100644 index 00000000000..57278a8dd13 --- /dev/null +++ b/arch/powerpc/mm/hash_low_32.S @@ -0,0 +1,618 @@ +/* + * arch/ppc/kernel/hashtable.S + * + * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $ + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * This file contains low-level assembler routines for managing + * the PowerPC MMU hash table. (PPC 8xx processors don't use a + * hash table, so this file is not used on them.) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP + .comm mmu_hash_lock,4 +#endif /* CONFIG_SMP */ + +/* + * Sync CPUs with hash_page taking & releasing the hash + * table lock + */ +#ifdef CONFIG_SMP + .text +_GLOBAL(hash_page_sync) + lis r8,mmu_hash_lock@h + ori r8,r8,mmu_hash_lock@l + lis r0,0x0fff + b 10f +11: lwz r6,0(r8) + cmpwi 0,r6,0 + bne 11b +10: lwarx r6,0,r8 + cmpwi 0,r6,0 + bne- 11b + stwcx. r0,0,r8 + bne- 10b + isync + eieio + li r0,0 + stw r0,0(r8) + blr +#endif + +/* + * Load a PTE into the hash table, if possible. + * The address is in r4, and r3 contains an access flag: + * _PAGE_RW (0x400) if a write. + * r9 contains the SRR1 value, from which we use the MSR_PR bit. + * SPRG3 contains the physical address of the current task's thread. + * + * Returns to the caller if the access is illegal or there is no + * mapping for the address. Otherwise it places an appropriate PTE + * in the hash table and returns from the exception. + * Uses r0, r3 - r8, ctr, lr. + */ + .text +_GLOBAL(hash_page) +#ifdef CONFIG_PPC64BRIDGE + mfmsr r0 + clrldi r0,r0,1 /* make sure it's in 32-bit mode */ + MTMSRD(r0) + isync +#endif + tophys(r7,0) /* gets -KERNELBASE into r7 */ +#ifdef CONFIG_SMP + addis r8,r7,mmu_hash_lock@h + ori r8,r8,mmu_hash_lock@l + lis r0,0x0fff + b 10f +11: lwz r6,0(r8) + cmpwi 0,r6,0 + bne 11b +10: lwarx r6,0,r8 + cmpwi 0,r6,0 + bne- 11b + stwcx. r0,0,r8 + bne- 10b + isync +#endif + /* Get PTE (linux-style) and check access */ + lis r0,KERNELBASE@h /* check if kernel address */ + cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */ + ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ + lwz r5,PGDIR(r8) /* virt page-table root */ + blt+ 112f /* assume user more likely */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ + rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ +112: add r5,r5,r7 /* convert to phys addr */ + rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ + lwz r8,0(r5) /* get pmd entry */ + rlwinm. r8,r8,0,0,19 /* extract address of pte page */ +#ifdef CONFIG_SMP + beq- hash_page_out /* return if no mapping */ +#else + /* XXX it seems like the 601 will give a machine fault on the + rfi if its alignment is wrong (bottom 4 bits of address are + 8 or 0xc) and we have had a not-taken conditional branch + to the address following the rfi. */ + beqlr- +#endif + rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE + + /* + * Update the linux PTE atomically. We do the lwarx up-front + * because almost always, there won't be a permission violation + * and there won't already be an HPTE, and thus we will have + * to update the PTE to set _PAGE_HASHPTE. -- paulus. + */ +retry: + lwarx r6,0,r8 /* get linux-style pte */ + andc. r5,r3,r6 /* check access & ~permission */ +#ifdef CONFIG_SMP + bne- hash_page_out /* return if access not permitted */ +#else + bnelr- +#endif + or r5,r0,r6 /* set accessed/dirty bits */ + stwcx. r5,0,r8 /* attempt to update PTE */ + bne- retry /* retry if someone got there first */ + + mfsrin r3,r4 /* get segment reg for segment */ + mfctr r0 + stw r0,_CTR(r11) + bl create_hpte /* add the hash table entry */ + +#ifdef CONFIG_SMP + eieio + addis r8,r7,mmu_hash_lock@ha + li r0,0 + stw r0,mmu_hash_lock@l(r8) +#endif + + /* Return from the exception */ + lwz r5,_CTR(r11) + mtctr r5 + lwz r0,GPR0(r11) + lwz r7,GPR7(r11) + lwz r8,GPR8(r11) + b fast_exception_return + +#ifdef CONFIG_SMP +hash_page_out: + eieio + addis r8,r7,mmu_hash_lock@ha + li r0,0 + stw r0,mmu_hash_lock@l(r8) + blr +#endif /* CONFIG_SMP */ + +/* + * Add an entry for a particular page to the hash table. + * + * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval) + * + * We assume any necessary modifications to the pte (e.g. setting + * the accessed bit) have already been done and that there is actually + * a hash table in use (i.e. we're not on a 603). + */ +_GLOBAL(add_hash_page) + mflr r0 + stw r0,4(r1) + + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + +#ifdef CONFIG_SMP + rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ + lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ + oris r8,r8,12 +#endif /* CONFIG_SMP */ + + /* + * We disable interrupts here, even on UP, because we don't + * want to race with hash_page, and because we want the + * _PAGE_HASHPTE bit to be a reliable indication of whether + * the HPTE exists (or at least whether one did once). + * We also turn off the MMU for data accesses so that we + * we can't take a hash table miss (assuming the code is + * covered by a BAT). -- paulus + */ + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + SYNC_601 + isync + + tophys(r7,0) + +#ifdef CONFIG_SMP + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l +10: lwarx r0,0,r9 /* take the mmu_hash_lock */ + cmpi 0,r0,0 + bne- 11f + stwcx. r8,0,r9 + beq+ 12f +11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b + b 11b +12: isync +#endif + + /* + * Fetch the linux pte and test and set _PAGE_HASHPTE atomically. + * If _PAGE_HASHPTE was already set, we don't replace the existing + * HPTE, so we just unlock and return. + */ + mr r8,r5 + rlwimi r8,r4,22,20,29 +1: lwarx r6,0,r8 + andi. r0,r6,_PAGE_HASHPTE + bne 9f /* if HASHPTE already set, done */ + ori r5,r6,_PAGE_HASHPTE + stwcx. r5,0,r8 + bne- 1b + + bl create_hpte + +9: +#ifdef CONFIG_SMP + eieio + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ +#endif + + /* reenable interrupts and DR */ + mtmsr r10 + SYNC_601 + isync + + lwz r0,4(r1) + mtlr r0 + blr + +/* + * This routine adds a hardware PTE to the hash table. + * It is designed to be called with the MMU either on or off. + * r3 contains the VSID, r4 contains the virtual address, + * r5 contains the linux PTE, r6 contains the old value of the + * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the + * offset to be added to addresses (0 if the MMU is on, + * -KERNELBASE if it is off). + * On SMP, the caller should have the mmu_hash_lock held. + * We assume that the caller has (or will) set the _PAGE_HASHPTE + * bit in the linux PTE in memory. The value passed in r6 should + * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set + * this routine will skip the search for an existing HPTE. + * This procedure modifies r0, r3 - r6, r8, cr0. + * -- paulus. + * + * For speed, 4 of the instructions get patched once the size and + * physical address of the hash table are known. These definitions + * of Hash_base and Hash_bits below are just an example. + */ +Hash_base = 0xc0180000 +Hash_bits = 12 /* e.g. 256kB hash table */ +Hash_msk = (((1 << Hash_bits) - 1) * 64) + +#ifndef CONFIG_PPC64BRIDGE +/* defines for the PTE format for 32-bit PPCs */ +#define PTE_SIZE 8 +#define PTEG_SIZE 64 +#define LG_PTEG_SIZE 6 +#define LDPTEu lwzu +#define STPTE stw +#define CMPPTE cmpw +#define PTE_H 0x40 +#define PTE_V 0x80000000 +#define TST_V(r) rlwinm. r,r,0,0,0 +#define SET_V(r) oris r,r,PTE_V@h +#define CLR_V(r,t) rlwinm r,r,0,1,31 + +#else +/* defines for the PTE format for 64-bit PPCs */ +#define PTE_SIZE 16 +#define PTEG_SIZE 128 +#define LG_PTEG_SIZE 7 +#define LDPTEu ldu +#define STPTE std +#define CMPPTE cmpd +#define PTE_H 2 +#define PTE_V 1 +#define TST_V(r) andi. r,r,PTE_V +#define SET_V(r) ori r,r,PTE_V +#define CLR_V(r,t) li t,PTE_V; andc r,r,t +#endif /* CONFIG_PPC64BRIDGE */ + +#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) +#define HASH_RIGHT 31-LG_PTEG_SIZE + +_GLOBAL(create_hpte) + /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ + rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r8,r8,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r8,r8,0xe14 /* clear out reserved bits and M */ + andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ +BEGIN_FTR_SECTION + ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ +END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) + + /* Construct the high word of the PPC-style PTE (r5) */ +#ifndef CONFIG_PPC64BRIDGE + rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ + rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ +#else /* CONFIG_PPC64BRIDGE */ + clrlwi r3,r3,8 /* reduce vsid to 24 bits */ + sldi r5,r3,12 /* shift vsid into position */ + rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */ +#endif /* CONFIG_PPC64BRIDGE */ + SET_V(r5) /* set V (valid) bit */ + + /* Get the address of the primary PTE group in the hash table (r3) */ +_GLOBAL(hash_page_patch_A) + addis r0,r7,Hash_base@h /* base address of hash table */ + rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ + rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ + xor r3,r3,r0 /* make primary hash */ + li r0,8 /* PTEs/group */ + + /* + * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search + * if it is clear, meaning that the HPTE isn't there already... + */ + andi. r6,r6,_PAGE_HASHPTE + beq+ 10f /* no PTE: go look for an empty slot */ + tlbie r4 + + addis r4,r7,htab_hash_searches@ha + lwz r6,htab_hash_searches@l(r4) + addi r6,r6,1 /* count how many searches we do */ + stw r6,htab_hash_searches@l(r4) + + /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ + mtctr r0 + addi r4,r3,-PTE_SIZE +1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ + CMPPTE 0,r6,r5 + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ found_slot + + /* Search the secondary PTEG for a matching PTE */ + ori r5,r5,PTE_H /* set H (secondary hash) bit */ +_GLOBAL(hash_page_patch_B) + xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ + xori r4,r4,(-PTEG_SIZE & 0xffff) + addi r4,r4,-PTE_SIZE + mtctr r0 +2: LDPTEu r6,PTE_SIZE(r4) + CMPPTE 0,r6,r5 + bdnzf 2,2b + beq+ found_slot + xori r5,r5,PTE_H /* clear H bit again */ + + /* Search the primary PTEG for an empty slot */ +10: mtctr r0 + addi r4,r3,-PTE_SIZE /* search primary PTEG */ +1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ + TST_V(r6) /* test valid bit */ + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ found_empty + + /* update counter of times that the primary PTEG is full */ + addis r4,r7,primary_pteg_full@ha + lwz r6,primary_pteg_full@l(r4) + addi r6,r6,1 + stw r6,primary_pteg_full@l(r4) + + /* Search the secondary PTEG for an empty slot */ + ori r5,r5,PTE_H /* set H (secondary hash) bit */ +_GLOBAL(hash_page_patch_C) + xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ + xori r4,r4,(-PTEG_SIZE & 0xffff) + addi r4,r4,-PTE_SIZE + mtctr r0 +2: LDPTEu r6,PTE_SIZE(r4) + TST_V(r6) + bdnzf 2,2b + beq+ found_empty + xori r5,r5,PTE_H /* clear H bit again */ + + /* + * Choose an arbitrary slot in the primary PTEG to overwrite. + * Since both the primary and secondary PTEGs are full, and we + * have no information that the PTEs in the primary PTEG are + * more important or useful than those in the secondary PTEG, + * and we know there is a definite (although small) speed + * advantage to putting the PTE in the primary PTEG, we always + * put the PTE in the primary PTEG. + */ + addis r4,r7,next_slot@ha + lwz r6,next_slot@l(r4) + addi r6,r6,PTE_SIZE + andi. r6,r6,7*PTE_SIZE + stw r6,next_slot@l(r4) + add r4,r3,r6 + +#ifndef CONFIG_SMP + /* Store PTE in PTEG */ +found_empty: + STPTE r5,0(r4) +found_slot: + STPTE r8,PTE_SIZE/2(r4) + +#else /* CONFIG_SMP */ +/* + * Between the tlbie above and updating the hash table entry below, + * another CPU could read the hash table entry and put it in its TLB. + * There are 3 cases: + * 1. using an empty slot + * 2. updating an earlier entry to change permissions (i.e. enable write) + * 3. taking over the PTE for an unrelated address + * + * In each case it doesn't really matter if the other CPUs have the old + * PTE in their TLB. So we don't need to bother with another tlbie here, + * which is convenient as we've overwritten the register that had the + * address. :-) The tlbie above is mainly to make sure that this CPU comes + * and gets the new PTE from the hash table. + * + * We do however have to make sure that the PTE is never in an invalid + * state with the V bit set. + */ +found_empty: +found_slot: + CLR_V(r5,r0) /* clear V (valid) bit in PTE */ + STPTE r5,0(r4) + sync + TLBSYNC + STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */ + sync + SET_V(r5) + STPTE r5,0(r4) /* finally set V bit in PTE */ +#endif /* CONFIG_SMP */ + + sync /* make sure pte updates get to memory */ + blr + + .comm next_slot,4 + .comm primary_pteg_full,4 + .comm htab_hash_searches,4 + +/* + * Flush the entry for a particular page from the hash table. + * + * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval, + * int count) + * + * We assume that there is a hash table in use (Hash != 0). + */ +_GLOBAL(flush_hash_pages) + tophys(r7,0) + + /* + * We disable interrupts here, even on UP, because we want + * the _PAGE_HASHPTE bit to be a reliable indication of + * whether the HPTE exists (or at least whether one did once). + * We also turn off the MMU for data accesses so that we + * we can't take a hash table miss (assuming the code is + * covered by a BAT). -- paulus + */ + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + SYNC_601 + isync + + /* First find a PTE in the range that has _PAGE_HASHPTE set */ + rlwimi r5,r4,22,20,29 +1: lwz r0,0(r5) + cmpwi cr1,r6,1 + andi. r0,r0,_PAGE_HASHPTE + bne 2f + ble cr1,19f + addi r4,r4,0x1000 + addi r5,r5,4 + addi r6,r6,-1 + b 1b + + /* Convert context and va to VSID */ +2: mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note code below trims to 24 bits */ + + /* Construct the high word of the PPC-style PTE (r11) */ +#ifndef CONFIG_PPC64BRIDGE + rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ + rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ +#else /* CONFIG_PPC64BRIDGE */ + clrlwi r3,r3,8 /* reduce vsid to 24 bits */ + sldi r11,r3,12 /* shift vsid into position */ + rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */ +#endif /* CONFIG_PPC64BRIDGE */ + SET_V(r11) /* set V (valid) bit */ + +#ifdef CONFIG_SMP + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l + rlwinm r8,r1,0,0,18 + add r8,r8,r7 + lwz r8,TI_CPU(r8) + oris r8,r8,9 +10: lwarx r0,0,r9 + cmpi 0,r0,0 + bne- 11f + stwcx. r8,0,r9 + beq+ 12f +11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b + b 11b +12: isync +#endif + + /* + * Check the _PAGE_HASHPTE bit in the linux PTE. If it is + * already clear, we're done (for this pte). If not, + * clear it (atomically) and proceed. -- paulus. + */ +33: lwarx r8,0,r5 /* fetch the pte */ + andi. r0,r8,_PAGE_HASHPTE + beq 8f /* done if HASHPTE is already clear */ + rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ + stwcx. r8,0,r5 /* update the pte */ + bne- 33b + + /* Get the address of the primary PTE group in the hash table (r3) */ +_GLOBAL(flush_hash_patch_A) + addis r8,r7,Hash_base@h /* base address of hash table */ + rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ + rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ + xor r8,r0,r8 /* make primary hash */ + + /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ + li r0,8 /* PTEs/group */ + mtctr r0 + addi r12,r8,-PTE_SIZE +1: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */ + CMPPTE 0,r0,r11 + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ 3f + + /* Search the secondary PTEG for a matching PTE */ + ori r11,r11,PTE_H /* set H (secondary hash) bit */ + li r0,8 /* PTEs/group */ +_GLOBAL(flush_hash_patch_B) + xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ + xori r12,r12,(-PTEG_SIZE & 0xffff) + addi r12,r12,-PTE_SIZE + mtctr r0 +2: LDPTEu r0,PTE_SIZE(r12) + CMPPTE 0,r0,r11 + bdnzf 2,2b + xori r11,r11,PTE_H /* clear H again */ + bne- 4f /* should rarely fail to find it */ + +3: li r0,0 + STPTE r0,0(r12) /* invalidate entry */ +4: sync + tlbie r4 /* in hw tlb too */ + sync + +8: ble cr1,9f /* if all ptes checked */ +81: addi r6,r6,-1 + addi r5,r5,4 /* advance to next pte */ + addi r4,r4,0x1000 + lwz r0,0(r5) /* check next pte */ + cmpwi cr1,r6,1 + andi. r0,r0,_PAGE_HASHPTE + bne 33b + bgt cr1,81b + +9: +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ +#endif + +19: mtmsr r10 + SYNC_601 + isync + blr diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c deleted file mode 100644 index bf13c14e66b..00000000000 --- a/arch/powerpc/mm/init.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mmu_decl.h" - -#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) -/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ -#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) -#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" -#endif -#endif -#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -unsigned long total_memory; -unsigned long total_lowmem; - -unsigned long ppc_memstart; -unsigned long ppc_memoffset = PAGE_OFFSET; - -int boot_mapsize; -#ifdef CONFIG_PPC_PMAC -unsigned long agp_special_page; -#endif - -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; - -EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); -#endif - -void MMU_init(void); - -/* XXX should be in current.h -- paulus */ -extern struct task_struct *current_set[NR_CPUS]; - -char *klimit = _end; -struct device_node *memory_node; - -extern int init_bootmem_done; - -/* - * this tells the system to map all of ram with the segregs - * (i.e. page tables) instead of the bats. - * -- Cort - */ -int __map_without_bats; -int __map_without_ltlbs; - -/* max amount of low RAM to map in */ -unsigned long __max_low_memory = MAX_LOW_MEM; - -/* - * limit of what is accessible with initial MMU setup - - * 256MB usually, but only 16MB on 601. - */ -unsigned long __initial_memory_limit = 0x10000000; - -/* - * Check for command-line options that affect what MMU_init will do. - */ -void MMU_setup(void) -{ - /* Check for nobats option (used in mapin_ram). */ - if (strstr(cmd_line, "nobats")) { - __map_without_bats = 1; - } - - if (strstr(cmd_line, "noltlbs")) { - __map_without_ltlbs = 1; - } -} - -/* - * MMU_init sets up the basic memory mappings for the kernel, - * including both RAM and possibly some I/O regions, - * and sets up the page tables and the MMU hardware ready to go. - */ -void __init MMU_init(void) -{ - if (ppc_md.progress) - ppc_md.progress("MMU:enter", 0x111); - - /* 601 can only access 16MB at the moment */ - if (PVR_VER(mfspr(SPRN_PVR)) == 1) - __initial_memory_limit = 0x01000000; - - /* parse args from command line */ - MMU_setup(); - - if (lmb.memory.cnt > 1) { - lmb.memory.cnt = 1; - lmb_analyze(); - printk(KERN_WARNING "Only using first contiguous memory region"); - } - - total_memory = lmb_end_of_DRAM(); - total_lowmem = total_memory; - -#ifdef CONFIG_FSL_BOOKE - /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB - * entries, so we need to adjust lowmem to match the amount we can map - * in the fixed entries */ - adjust_total_lowmem(); -#endif /* CONFIG_FSL_BOOKE */ - if (total_lowmem > __max_low_memory) { - total_lowmem = __max_low_memory; -#ifndef CONFIG_HIGHMEM - total_memory = total_lowmem; -#endif /* CONFIG_HIGHMEM */ - } - - /* Initialize the MMU hardware */ - if (ppc_md.progress) - ppc_md.progress("MMU:hw init", 0x300); - MMU_init_hw(); - - /* Map in all of RAM starting at KERNELBASE */ - if (ppc_md.progress) - ppc_md.progress("MMU:mapin", 0x301); - mapin_ram(); - -#ifdef CONFIG_HIGHMEM - ioremap_base = PKMAP_BASE; -#else - ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ -#endif /* CONFIG_HIGHMEM */ - ioremap_bot = ioremap_base; - - /* Map in I/O resources */ - if (ppc_md.progress) - ppc_md.progress("MMU:setio", 0x302); - if (ppc_md.setup_io_mappings) - ppc_md.setup_io_mappings(); - - /* Initialize the context management stuff */ - mmu_context_init(); - - if (ppc_md.progress) - ppc_md.progress("MMU:exit", 0x211); - -#ifdef CONFIG_BOOTX_TEXT - /* By default, we are no longer mapped */ - boot_text_mapped = 0; - /* Must be done last, or ppc_md.progress will die. */ - map_boot_text(); -#endif -} - -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - void *p; - - if (init_bootmem_done) { - p = alloc_bootmem_pages(PAGE_SIZE); - } else { - p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, - __initial_memory_limit)); - } - return p; -} - -/* Free up now-unused memory */ -static void free_sec(unsigned long start, unsigned long end, const char *name) -{ - unsigned long cnt = 0; - - while (start < end) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - cnt++; - start += PAGE_SIZE; - } - if (cnt) { - printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); - totalram_pages += cnt; - } -} - -void free_initmem(void) -{ -#define FREESEC(TYPE) \ - free_sec((unsigned long)(&__ ## TYPE ## _begin), \ - (unsigned long)(&__ ## TYPE ## _end), \ - #TYPE); - - printk ("Freeing unused kernel memory:"); - FREESEC(init); - printk("\n"); - ppc_md.progress = NULL; -#undef FREESEC -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } -} -#endif diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c deleted file mode 100644 index c0ce6a7af3c..00000000000 --- a/arch/powerpc/mm/init64.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * Dave Engebretsen - * Rework for PPC64 port. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if PGTABLE_RANGE > USER_VSID_RANGE -#warning Limited user VSID range means pagetable space is wasted -#endif - -#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) -#warning TASK_SIZE is smaller than it needs to be. -#endif - -int mem_init_done; -unsigned long ioremap_bot = IMALLOC_BASE; -static unsigned long phbs_io_bot = PHBS_IO_BASE; - -extern pgd_t swapper_pg_dir[]; -extern struct task_struct *current_set[NR_CPUS]; - -unsigned long klimit = (unsigned long)_end; - -unsigned long _SDR1=0; -unsigned long _ASR=0; - -/* max amount of RAM to use */ -unsigned long __max_memory; - -/* info on what we think the IO hole is */ -unsigned long io_hole_start; -unsigned long io_hole_size; - -/* - * Do very early mm setup. - */ -void __init mm_init_ppc64(void) -{ -#ifndef CONFIG_PPC_ISERIES - unsigned long i; -#endif - - ppc64_boot_msg(0x100, "MM Init"); - - /* This is the story of the IO hole... please, keep seated, - * unfortunately, we are out of oxygen masks at the moment. - * So we need some rough way to tell where your big IO hole - * is. On pmac, it's between 2G and 4G, on POWER3, it's around - * that area as well, on POWER4 we don't have one, etc... - * We need that as a "hint" when sizing the TCE table on POWER3 - * So far, the simplest way that seem work well enough for us it - * to just assume that the first discontinuity in our physical - * RAM layout is the IO hole. That may not be correct in the future - * (and isn't on iSeries but then we don't care ;) - */ - -#ifndef CONFIG_PPC_ISERIES - for (i = 1; i < lmb.memory.cnt; i++) { - unsigned long base, prevbase, prevsize; - - prevbase = lmb.memory.region[i-1].base; - prevsize = lmb.memory.region[i-1].size; - base = lmb.memory.region[i].base; - if (base > (prevbase + prevsize)) { - io_hole_start = prevbase + prevsize; - io_hole_size = base - (prevbase + prevsize); - break; - } - } -#endif /* CONFIG_PPC_ISERIES */ - if (io_hole_start) - printk("IO Hole assumed to be %lx -> %lx\n", - io_hole_start, io_hole_start + io_hole_size - 1); - - ppc64_boot_msg(0x100, "MM Init Done"); -} - -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)__init_begin; - for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { - memset((void *)addr, 0xcc, PAGE_SIZE); - ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - free_page(addr); - totalram_pages++; - } - printk ("Freeing unused kernel memory: %luk freed\n", - ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } -} -#endif - -static struct kcore_list kcore_vmem; - -static int __init setup_kcore(void) -{ - int i; - - for (i=0; i < lmb.memory.cnt; i++) { - unsigned long base, size; - struct kcore_list *kcore_mem; - - base = lmb.memory.region[i].base; - size = lmb.memory.region[i].size; - - /* GFP_ATOMIC to avoid might_sleep warnings during boot */ - kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); - if (!kcore_mem) - panic("mem_init: kmalloc failed\n"); - - kclist_add(kcore_mem, __va(base), size); - } - - kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); - - return 0; -} -module_init(setup_kcore); - -void __iomem * reserve_phb_iospace(unsigned long size) -{ - void __iomem *virt_addr; - - if (phbs_io_bot >= IMALLOC_BASE) - panic("reserve_phb_iospace(): phb io space overflow\n"); - - virt_addr = (void __iomem *) phbs_io_bot; - phbs_io_bot += size; - - return virt_addr; -} - -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) -{ - memset(addr, 0, kmem_cache_size(cache)); -} - -static const int pgtable_cache_size[2] = { - PTE_TABLE_SIZE, PMD_TABLE_SIZE -}; -static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pgd_pte_cache", "pud_pmd_cache", -}; - -kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; - -void pgtable_cache_init(void) -{ - int i; - - BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); - BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); - BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); - BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); - - for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { - int size = pgtable_cache_size[i]; - const char *name = pgtable_cache_name[i]; - - pgtable_cache[i] = kmem_cache_create(name, - size, size, - SLAB_HWCACHE_ALIGN - | SLAB_MUST_HWCACHE_ALIGN, - zero_ctor, - NULL); - if (! pgtable_cache[i]) - panic("pgtable_cache_init(): could not create %s!\n", - name); - } -} - -pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, - unsigned long size, pgprot_t vma_prot) -{ - if (ppc_md.phys_mem_access_prot) - return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); - - if (!page_is_ram(addr >> PAGE_SHIFT)) - vma_prot = __pgprot(pgprot_val(vma_prot) - | _PAGE_GUARDED | _PAGE_NO_CACHE); - return vma_prot; -} -EXPORT_SYMBOL(phys_mem_access_prot); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c new file mode 100644 index 00000000000..bf13c14e66b --- /dev/null +++ b/arch/powerpc/mm/init_32.c @@ -0,0 +1,258 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mmu_decl.h" + +#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) +/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ +#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) +#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" +#endif +#endif +#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +unsigned long total_memory; +unsigned long total_lowmem; + +unsigned long ppc_memstart; +unsigned long ppc_memoffset = PAGE_OFFSET; + +int boot_mapsize; +#ifdef CONFIG_PPC_PMAC +unsigned long agp_special_page; +#endif + +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + +void MMU_init(void); + +/* XXX should be in current.h -- paulus */ +extern struct task_struct *current_set[NR_CPUS]; + +char *klimit = _end; +struct device_node *memory_node; + +extern int init_bootmem_done; + +/* + * this tells the system to map all of ram with the segregs + * (i.e. page tables) instead of the bats. + * -- Cort + */ +int __map_without_bats; +int __map_without_ltlbs; + +/* max amount of low RAM to map in */ +unsigned long __max_low_memory = MAX_LOW_MEM; + +/* + * limit of what is accessible with initial MMU setup - + * 256MB usually, but only 16MB on 601. + */ +unsigned long __initial_memory_limit = 0x10000000; + +/* + * Check for command-line options that affect what MMU_init will do. + */ +void MMU_setup(void) +{ + /* Check for nobats option (used in mapin_ram). */ + if (strstr(cmd_line, "nobats")) { + __map_without_bats = 1; + } + + if (strstr(cmd_line, "noltlbs")) { + __map_without_ltlbs = 1; + } +} + +/* + * MMU_init sets up the basic memory mappings for the kernel, + * including both RAM and possibly some I/O regions, + * and sets up the page tables and the MMU hardware ready to go. + */ +void __init MMU_init(void) +{ + if (ppc_md.progress) + ppc_md.progress("MMU:enter", 0x111); + + /* 601 can only access 16MB at the moment */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + __initial_memory_limit = 0x01000000; + + /* parse args from command line */ + MMU_setup(); + + if (lmb.memory.cnt > 1) { + lmb.memory.cnt = 1; + lmb_analyze(); + printk(KERN_WARNING "Only using first contiguous memory region"); + } + + total_memory = lmb_end_of_DRAM(); + total_lowmem = total_memory; + +#ifdef CONFIG_FSL_BOOKE + /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB + * entries, so we need to adjust lowmem to match the amount we can map + * in the fixed entries */ + adjust_total_lowmem(); +#endif /* CONFIG_FSL_BOOKE */ + if (total_lowmem > __max_low_memory) { + total_lowmem = __max_low_memory; +#ifndef CONFIG_HIGHMEM + total_memory = total_lowmem; +#endif /* CONFIG_HIGHMEM */ + } + + /* Initialize the MMU hardware */ + if (ppc_md.progress) + ppc_md.progress("MMU:hw init", 0x300); + MMU_init_hw(); + + /* Map in all of RAM starting at KERNELBASE */ + if (ppc_md.progress) + ppc_md.progress("MMU:mapin", 0x301); + mapin_ram(); + +#ifdef CONFIG_HIGHMEM + ioremap_base = PKMAP_BASE; +#else + ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ +#endif /* CONFIG_HIGHMEM */ + ioremap_bot = ioremap_base; + + /* Map in I/O resources */ + if (ppc_md.progress) + ppc_md.progress("MMU:setio", 0x302); + if (ppc_md.setup_io_mappings) + ppc_md.setup_io_mappings(); + + /* Initialize the context management stuff */ + mmu_context_init(); + + if (ppc_md.progress) + ppc_md.progress("MMU:exit", 0x211); + +#ifdef CONFIG_BOOTX_TEXT + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + /* Must be done last, or ppc_md.progress will die. */ + map_boot_text(); +#endif +} + +/* This is only called until mem_init is done. */ +void __init *early_get_page(void) +{ + void *p; + + if (init_bootmem_done) { + p = alloc_bootmem_pages(PAGE_SIZE); + } else { + p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + __initial_memory_limit)); + } + return p; +} + +/* Free up now-unused memory */ +static void free_sec(unsigned long start, unsigned long end, const char *name) +{ + unsigned long cnt = 0; + + while (start < end) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + cnt++; + start += PAGE_SIZE; + } + if (cnt) { + printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); + totalram_pages += cnt; + } +} + +void free_initmem(void) +{ +#define FREESEC(TYPE) \ + free_sec((unsigned long)(&__ ## TYPE ## _begin), \ + (unsigned long)(&__ ## TYPE ## _end), \ + #TYPE); + + printk ("Freeing unused kernel memory:"); + FREESEC(init); + printk("\n"); + ppc_md.progress = NULL; +#undef FREESEC +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c new file mode 100644 index 00000000000..c0ce6a7af3c --- /dev/null +++ b/arch/powerpc/mm/init_64.c @@ -0,0 +1,259 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + +int mem_init_done; +unsigned long ioremap_bot = IMALLOC_BASE; +static unsigned long phbs_io_bot = PHBS_IO_BASE; + +extern pgd_t swapper_pg_dir[]; +extern struct task_struct *current_set[NR_CPUS]; + +unsigned long klimit = (unsigned long)_end; + +unsigned long _SDR1=0; +unsigned long _ASR=0; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* info on what we think the IO hole is */ +unsigned long io_hole_start; +unsigned long io_hole_size; + +/* + * Do very early mm setup. + */ +void __init mm_init_ppc64(void) +{ +#ifndef CONFIG_PPC_ISERIES + unsigned long i; +#endif + + ppc64_boot_msg(0x100, "MM Init"); + + /* This is the story of the IO hole... please, keep seated, + * unfortunately, we are out of oxygen masks at the moment. + * So we need some rough way to tell where your big IO hole + * is. On pmac, it's between 2G and 4G, on POWER3, it's around + * that area as well, on POWER4 we don't have one, etc... + * We need that as a "hint" when sizing the TCE table on POWER3 + * So far, the simplest way that seem work well enough for us it + * to just assume that the first discontinuity in our physical + * RAM layout is the IO hole. That may not be correct in the future + * (and isn't on iSeries but then we don't care ;) + */ + +#ifndef CONFIG_PPC_ISERIES + for (i = 1; i < lmb.memory.cnt; i++) { + unsigned long base, prevbase, prevsize; + + prevbase = lmb.memory.region[i-1].base; + prevsize = lmb.memory.region[i-1].size; + base = lmb.memory.region[i].base; + if (base > (prevbase + prevsize)) { + io_hole_start = prevbase + prevsize; + io_hole_size = base - (prevbase + prevsize); + break; + } + } +#endif /* CONFIG_PPC_ISERIES */ + if (io_hole_start) + printk("IO Hole assumed to be %lx -> %lx\n", + io_hole_start, io_hole_start + io_hole_size - 1); + + ppc64_boot_msg(0x100, "MM Init Done"); +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)__init_begin; + for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { + memset((void *)addr, 0xcc, PAGE_SIZE); + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk ("Freeing unused kernel memory: %luk freed\n", + ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +static struct kcore_list kcore_vmem; + +static int __init setup_kcore(void) +{ + int i; + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base, size; + struct kcore_list *kcore_mem; + + base = lmb.memory.region[i].base; + size = lmb.memory.region[i].size; + + /* GFP_ATOMIC to avoid might_sleep warnings during boot */ + kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); + if (!kcore_mem) + panic("mem_init: kmalloc failed\n"); + + kclist_add(kcore_mem, __va(base), size); + } + + kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); + + return 0; +} +module_init(setup_kcore); + +void __iomem * reserve_phb_iospace(unsigned long size) +{ + void __iomem *virt_addr; + + if (phbs_io_bot >= IMALLOC_BASE) + panic("reserve_phb_iospace(): phb io space overflow\n"); + + virt_addr = (void __iomem *) phbs_io_bot; + phbs_io_bot += size; + + return virt_addr; +} + +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +{ + memset(addr, 0, kmem_cache_size(cache)); +} + +static const int pgtable_cache_size[2] = { + PTE_TABLE_SIZE, PMD_TABLE_SIZE +}; +static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { + "pgd_pte_cache", "pud_pmd_cache", +}; + +kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; + +void pgtable_cache_init(void) +{ + int i; + + BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); + BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); + BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); + BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); + + for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { + int size = pgtable_cache_size[i]; + const char *name = pgtable_cache_name[i]; + + pgtable_cache[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN + | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (! pgtable_cache[i]) + panic("pgtable_cache_init(): could not create %s!\n", + name); + } +} + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, + unsigned long size, pgprot_t vma_prot) +{ + if (ppc_md.phys_mem_access_prot) + return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + + if (!page_is_ram(addr >> PAGE_SHIFT)) + vma_prot = __pgprot(pgprot_val(vma_prot) + | _PAGE_GUARDED | _PAGE_NO_CACHE); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c deleted file mode 100644 index ef765a84433..00000000000 --- a/arch/powerpc/mm/mem64.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * Dave Engebretsen - * Rework for PPC64 port. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * This is called by /dev/mem to know if a given address has to - * be mapped non-cacheable or not - */ -int page_is_ram(unsigned long pfn) -{ - int i; - unsigned long paddr = (pfn << PAGE_SHIFT); - - for (i=0; i < lmb.memory.cnt; i++) { - unsigned long base; - - base = lmb.memory.region[i].base; - - if ((paddr >= base) && - (paddr < (base + lmb.memory.region[i].size))) { - return 1; - } - } - - return 0; -} -EXPORT_SYMBOL(page_is_ram); - -pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, - unsigned long size, pgprot_t vma_prot) -{ - if (ppc_md.phys_mem_access_prot) - return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); - - if (!page_is_ram(addr >> PAGE_SHIFT)) - vma_prot = __pgprot(pgprot_val(vma_prot) - | _PAGE_GUARDED | _PAGE_NO_CACHE); - return vma_prot; -} -EXPORT_SYMBOL(phys_mem_access_prot); - -void show_mem(void) -{ - unsigned long total = 0, reserved = 0; - unsigned long shared = 0, cached = 0; - struct page *page; - pg_data_t *pgdat; - unsigned long i; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat_page_nr(pgdat, i); - total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - } - printk("%ld pages of RAM\n", total); - printk("%ld reserved pages\n", reserved); - printk("%ld pages shared\n", shared); - printk("%ld pages swap cached\n", cached); -} - -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &page->flags)) - clear_bit(PG_arch_1, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - -void clear_user_page(void *page, unsigned long vaddr, struct page *pg) -{ - clear_page(page); - - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* - * We shouldnt have to do this, but some versions of glibc - * require it (ld.so assumes zero filled pages are icache clean) - * - Anton - */ - - /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &pg->flags)) - clear_bit(PG_arch_1, &pg->flags); -} -EXPORT_SYMBOL(clear_user_page); - -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) -{ - copy_page(vto, vfrom); - - /* - * We should be able to use the following optimisation, however - * there are two problems. - * Firstly a bug in some versions of binutils meant PLT sections - * were not marked executable. - * Secondly the first word in the GOT section is blrl, used - * to establish the GOT address. Until recently the GOT was - * not marked executable. - * - Anton - */ -#if 0 - if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) - return; -#endif - - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - - /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &pg->flags)) - clear_bit(PG_arch_1, &pg->flags); -} - -void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - unsigned long maddr; - - maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); -} -EXPORT_SYMBOL(flush_icache_user_range); - -/* - * This is called at the end of handling a user page fault, when the - * fault has been handled by updating a PTE in the linux page tables. - * We use it to preload an HPTE into the hash table corresponding to - * the updated linux PTE. - * - * This must always be called with the mm->page_table_lock held - */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, - pte_t pte) -{ - unsigned long vsid; - void *pgdir; - pte_t *ptep; - int local = 0; - cpumask_t tmp; - unsigned long flags; - - /* handle i-cache coherency */ - if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && - !cpu_has_feature(CPU_FTR_NOEXECUTE)) { - unsigned long pfn = pte_pfn(pte); - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - if (!PageReserved(page) - && !test_bit(PG_arch_1, &page->flags)) { - __flush_dcache_icache(page_address(page)); - set_bit(PG_arch_1, &page->flags); - } - } - } - - /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ - if (!pte_young(pte)) - return; - - pgdir = vma->vm_mm->pgd; - if (pgdir == NULL) - return; - - ptep = find_linux_pte(pgdir, ea); - if (!ptep) - return; - - vsid = get_vsid(vma->vm_mm->context.id, ea); - - local_irq_save(flags); - tmp = cpumask_of_cpu(smp_processor_id()); - if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) - local = 1; - - __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, - 0x300, local); - local_irq_restore(flags); -} diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c deleted file mode 100644 index a8816e0f6a8..00000000000 --- a/arch/powerpc/mm/mmu_context.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This file contains the routines for handling the MMU on those - * PowerPC implementations where the MMU substantially follows the - * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include - -#include -#include - -mm_context_t next_mmu_context; -unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; -#ifdef FEW_CONTEXTS -atomic_t nr_free_contexts; -struct mm_struct *context_mm[LAST_CONTEXT+1]; -void steal_context(void); -#endif /* FEW_CONTEXTS */ - -/* - * Initialize the context management stuff. - */ -void __init -mmu_context_init(void) -{ - /* - * Some processors have too few contexts to reserve one for - * init_mm, and require using context 0 for a normal task. - * Other processors reserve the use of context zero for the kernel. - * This code assumes FIRST_CONTEXT < 32. - */ - context_map[0] = (1 << FIRST_CONTEXT) - 1; - next_mmu_context = FIRST_CONTEXT; -#ifdef FEW_CONTEXTS - atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); -#endif /* FEW_CONTEXTS */ -} - -#ifdef FEW_CONTEXTS -/* - * Steal a context from a task that has one at the moment. - * This is only used on 8xx and 4xx and we presently assume that - * they don't do SMP. If they do then this will have to check - * whether the MM we steal is in use. - * We also assume that this is only used on systems that don't - * use an MMU hash table - this is true for 8xx and 4xx. - * This isn't an LRU system, it just frees up each context in - * turn (sort-of pseudo-random replacement :). This would be the - * place to implement an LRU scheme if anyone was motivated to do it. - * -- paulus - */ -void -steal_context(void) -{ - struct mm_struct *mm; - - /* free up context `next_mmu_context' */ - /* if we shouldn't free context 0, don't... */ - if (next_mmu_context < FIRST_CONTEXT) - next_mmu_context = FIRST_CONTEXT; - mm = context_mm[next_mmu_context]; - flush_tlb_mm(mm); - destroy_context(mm); -} -#endif /* FEW_CONTEXTS */ diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c deleted file mode 100644 index 714a84dd8d5..00000000000 --- a/arch/powerpc/mm/mmu_context64.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * MMU context allocation for 64-bit kernels. - * - * Copyright (C) 2004 Anton Blanchard, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static DEFINE_SPINLOCK(mmu_context_lock); -static DEFINE_IDR(mmu_context_idr); - -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - int index; - int err; - -again: - if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(&mmu_context_lock); - err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); - spin_unlock(&mmu_context_lock); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > MAX_CONTEXT) { - idr_remove(&mmu_context_idr, index); - return -ENOMEM; - } - - mm->context.id = index; - - return 0; -} - -void destroy_context(struct mm_struct *mm) -{ - spin_lock(&mmu_context_lock); - idr_remove(&mmu_context_idr, mm->context.id); - spin_unlock(&mmu_context_lock); - - mm->context.id = NO_CONTEXT; -} diff --git a/arch/powerpc/mm/mmu_context_32.c b/arch/powerpc/mm/mmu_context_32.c new file mode 100644 index 00000000000..a8816e0f6a8 --- /dev/null +++ b/arch/powerpc/mm/mmu_context_32.c @@ -0,0 +1,86 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include + +mm_context_t next_mmu_context; +unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; +#ifdef FEW_CONTEXTS +atomic_t nr_free_contexts; +struct mm_struct *context_mm[LAST_CONTEXT+1]; +void steal_context(void); +#endif /* FEW_CONTEXTS */ + +/* + * Initialize the context management stuff. + */ +void __init +mmu_context_init(void) +{ + /* + * Some processors have too few contexts to reserve one for + * init_mm, and require using context 0 for a normal task. + * Other processors reserve the use of context zero for the kernel. + * This code assumes FIRST_CONTEXT < 32. + */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_mmu_context = FIRST_CONTEXT; +#ifdef FEW_CONTEXTS + atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); +#endif /* FEW_CONTEXTS */ +} + +#ifdef FEW_CONTEXTS +/* + * Steal a context from a task that has one at the moment. + * This is only used on 8xx and 4xx and we presently assume that + * they don't do SMP. If they do then this will have to check + * whether the MM we steal is in use. + * We also assume that this is only used on systems that don't + * use an MMU hash table - this is true for 8xx and 4xx. + * This isn't an LRU system, it just frees up each context in + * turn (sort-of pseudo-random replacement :). This would be the + * place to implement an LRU scheme if anyone was motivated to do it. + * -- paulus + */ +void +steal_context(void) +{ + struct mm_struct *mm; + + /* free up context `next_mmu_context' */ + /* if we shouldn't free context 0, don't... */ + if (next_mmu_context < FIRST_CONTEXT) + next_mmu_context = FIRST_CONTEXT; + mm = context_mm[next_mmu_context]; + flush_tlb_mm(mm); + destroy_context(mm); +} +#endif /* FEW_CONTEXTS */ diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c new file mode 100644 index 00000000000..714a84dd8d5 --- /dev/null +++ b/arch/powerpc/mm/mmu_context_64.c @@ -0,0 +1,63 @@ +/* + * MMU context allocation for 64-bit kernels. + * + * Copyright (C) 2004 Anton Blanchard, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static DEFINE_SPINLOCK(mmu_context_lock); +static DEFINE_IDR(mmu_context_idr); + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int index; + int err; + +again: + if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmu_context_lock); + err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + spin_unlock(&mmu_context_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_CONTEXT) { + idr_remove(&mmu_context_idr, index); + return -ENOMEM; + } + + mm->context.id = index; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, mm->context.id); + spin_unlock(&mmu_context_lock); + + mm->context.id = NO_CONTEXT; +} diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c deleted file mode 100644 index 5792e533916..00000000000 --- a/arch/powerpc/mm/pgtable.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * This file contains the routines setting up the linux page tables. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mmu_decl.h" - -unsigned long ioremap_base; -unsigned long ioremap_bot; -int io_bat_index; - -#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) -#define HAVE_BATS 1 -#endif - -#if defined(CONFIG_FSL_BOOKE) -#define HAVE_TLBCAM 1 -#endif - -extern char etext[], _stext[]; - -#ifdef CONFIG_SMP -extern void hash_page_sync(void); -#endif - -#ifdef HAVE_BATS -extern unsigned long v_mapped_by_bats(unsigned long va); -extern unsigned long p_mapped_by_bats(unsigned long pa); -void setbat(int index, unsigned long virt, unsigned long phys, - unsigned int size, int flags); - -#else /* !HAVE_BATS */ -#define v_mapped_by_bats(x) (0UL) -#define p_mapped_by_bats(x) (0UL) -#endif /* HAVE_BATS */ - -#ifdef HAVE_TLBCAM -extern unsigned int tlbcam_index; -extern unsigned long v_mapped_by_tlbcam(unsigned long va); -extern unsigned long p_mapped_by_tlbcam(unsigned long pa); -#else /* !HAVE_TLBCAM */ -#define v_mapped_by_tlbcam(x) (0UL) -#define p_mapped_by_tlbcam(x) (0UL) -#endif /* HAVE_TLBCAM */ - -#ifdef CONFIG_PTE_64BIT -/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ -#define PGDIR_ORDER 1 -#else -#define PGDIR_ORDER 0 -#endif - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *ret; - - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); - return ret; -} - -void pgd_free(pgd_t *pgd) -{ - free_pages((unsigned long)pgd, PGDIR_ORDER); -} - -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - extern int mem_init_done; - extern void *early_get_page(void); - - if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - } else { - pte = (pte_t *)early_get_page(); - if (pte) - clear_page(pte); - } - return pte; -} - -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *ptepage; - -#ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; -#else - int flags = GFP_KERNEL | __GFP_REPEAT; -#endif - - ptepage = alloc_pages(flags, 0); - if (ptepage) - clear_highpage(ptepage); - return ptepage; -} - -void pte_free_kernel(pte_t *pte) -{ -#ifdef CONFIG_SMP - hash_page_sync(); -#endif - free_page((unsigned long)pte); -} - -void pte_free(struct page *ptepage) -{ -#ifdef CONFIG_SMP - hash_page_sync(); -#endif - __free_page(ptepage); -} - -#ifndef CONFIG_PHYS_64BIT -void __iomem * -ioremap(phys_addr_t addr, unsigned long size) -{ - return __ioremap(addr, size, _PAGE_NO_CACHE); -} -#else /* CONFIG_PHYS_64BIT */ -void __iomem * -ioremap64(unsigned long long addr, unsigned long size) -{ - return __ioremap(addr, size, _PAGE_NO_CACHE); -} - -void __iomem * -ioremap(phys_addr_t addr, unsigned long size) -{ - phys_addr_t addr64 = fixup_bigphys_addr(addr, size); - - return ioremap64(addr64, size); -} -#endif /* CONFIG_PHYS_64BIT */ - -void __iomem * -__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) -{ - unsigned long v, i; - phys_addr_t p; - int err; - - /* - * Choose an address to map it to. - * Once the vmalloc system is running, we use it. - * Before then, we use space going down from ioremap_base - * (ioremap_bot records where we're up to). - */ - p = addr & PAGE_MASK; - size = PAGE_ALIGN(addr + size) - p; - - /* - * If the address lies within the first 16 MB, assume it's in ISA - * memory space - */ - if (p < 16*1024*1024) - p += _ISA_MEM_BASE; - - /* - * Don't allow anybody to remap normal RAM that we're using. - * mem_init() sets high_memory so only do the check after that. - */ - if (mem_init_done && (p < virt_to_phys(high_memory))) { - printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p, - __builtin_return_address(0)); - return NULL; - } - - if (size == 0) - return NULL; - - /* - * Is it already mapped? Perhaps overlapped by a previous - * BAT mapping. If the whole area is mapped then we're done, - * otherwise remap it since we want to keep the virt addrs for - * each request contiguous. - * - * We make the assumption here that if the bottom and top - * of the range we want are mapped then it's mapped to the - * same virt address (and this is contiguous). - * -- Cort - */ - if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ ) - goto out; - - if ((v = p_mapped_by_tlbcam(p))) - goto out; - - if (mem_init_done) { - struct vm_struct *area; - area = get_vm_area(size, VM_IOREMAP); - if (area == 0) - return NULL; - v = (unsigned long) area->addr; - } else { - v = (ioremap_bot -= size); - } - - if ((flags & _PAGE_PRESENT) == 0) - flags |= _PAGE_KERNEL; - if (flags & _PAGE_NO_CACHE) - flags |= _PAGE_GUARDED; - - /* - * Should check if it is a candidate for a BAT mapping - */ - - err = 0; - for (i = 0; i < size && err == 0; i += PAGE_SIZE) - err = map_page(v+i, p+i, flags); - if (err) { - if (mem_init_done) - vunmap((void *)v); - return NULL; - } - -out: - return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); -} - -void iounmap(volatile void __iomem *addr) -{ - /* - * If mapped by BATs then there is nothing to do. - * Calling vfree() generates a benign warning. - */ - if (v_mapped_by_bats((unsigned long)addr)) return; - - if (addr > high_memory && (unsigned long) addr < ioremap_bot) - vunmap((void *) (PAGE_MASK & (unsigned long)addr)); -} - -void __iomem *ioport_map(unsigned long port, unsigned int len) -{ - return (void __iomem *) (port + _IO_BASE); -} - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing to do */ -} -EXPORT_SYMBOL(ioport_map); -EXPORT_SYMBOL(ioport_unmap); - -int -map_page(unsigned long va, phys_addr_t pa, int flags) -{ - pmd_t *pd; - pte_t *pg; - int err = -ENOMEM; - - spin_lock(&init_mm.page_table_lock); - /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pgd_offset_k(va), va); - /* Use middle 10 bits of VA to index the second-level map */ - pg = pte_alloc_kernel(&init_mm, pd, va); - if (pg != 0) { - err = 0; - set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); - if (mem_init_done) - flush_HPTE(0, va, pmd_val(*pd)); - } - spin_unlock(&init_mm.page_table_lock); - return err; -} - -/* - * Map in all of physical memory starting at KERNELBASE. - */ -void __init mapin_ram(void) -{ - unsigned long v, p, s, f; - - s = mmu_mapin_ram(); - v = KERNELBASE + s; - p = PPC_MEMSTART + s; - for (; s < total_lowmem; s += PAGE_SIZE) { - if ((char *) v >= _stext && (char *) v < etext) - f = _PAGE_RAM_TEXT; - else - f = _PAGE_RAM; - map_page(v, p, f); - v += PAGE_SIZE; - p += PAGE_SIZE; - } -} - -/* is x a power of 2? */ -#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) - -/* is x a power of 4? */ -#define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1)) - -/* - * Set up a mapping for a block of I/O. - * virt, phys, size must all be page-aligned. - * This should only be called before ioremap is called. - */ -void __init io_block_mapping(unsigned long virt, phys_addr_t phys, - unsigned int size, int flags) -{ - int i; - - if (virt > KERNELBASE && virt < ioremap_bot) - ioremap_bot = ioremap_base = virt; - -#ifdef HAVE_BATS - /* - * Use a BAT for this if possible... - */ - if (io_bat_index < 2 && is_power_of_2(size) - && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { - setbat(io_bat_index, virt, phys, size, flags); - ++io_bat_index; - return; - } -#endif /* HAVE_BATS */ - -#ifdef HAVE_TLBCAM - /* - * Use a CAM for this if possible... - */ - if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) - && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { - settlbcam(tlbcam_index, virt, phys, size, flags, 0); - ++tlbcam_index; - return; - } -#endif /* HAVE_TLBCAM */ - - /* No BATs available, put it in the page tables. */ - for (i = 0; i < size; i += PAGE_SIZE) - map_page(virt + i, phys + i, flags); -} - -/* Scan the real Linux page tables and return a PTE pointer for - * a virtual address in a context. - * Returns true (1) if PTE was found, zero otherwise. The pointer to - * the PTE pointer is unmodified if PTE is not found. - */ -int -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int retval = 0; - - pgd = pgd_offset(mm, addr & PAGE_MASK); - if (pgd) { - pmd = pmd_offset(pgd, addr & PAGE_MASK); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd, addr & PAGE_MASK); - if (pte) { - retval = 1; - *ptep = pte; - /* XXX caller needs to do pte_unmap, yuck */ - } - } - } - return(retval); -} - -/* Find physical address for this virtual address. Normally used by - * I/O functions, but anyone can call it. - */ -unsigned long iopa(unsigned long addr) -{ - unsigned long pa; - - /* I don't know why this won't work on PMacs or CHRP. It - * appears there is some bug, or there is some implicit - * mapping done not properly represented by BATs or in page - * tables.......I am actively working on resolving this, but - * can't hold up other stuff. -- Dan - */ - pte_t *pte; - struct mm_struct *mm; - - /* Check the BATs */ - pa = v_mapped_by_bats(addr); - if (pa) - return pa; - - /* Allow mapping of user addresses (within the thread) - * for DMA if necessary. - */ - if (addr < TASK_SIZE) - mm = current->mm; - else - mm = &init_mm; - - pa = 0; - if (get_pteptr(mm, addr, &pte)) { - pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); - pte_unmap(pte); - } - - return(pa); -} - -/* This is will find the virtual address for a physical one.... - * Swiped from APUS, could be dangerous :-). - * This is only a placeholder until I really find a way to make this - * work. -- Dan - */ -unsigned long -mm_ptov (unsigned long paddr) -{ - unsigned long ret; -#if 0 - if (paddr < 16*1024*1024) - ret = ZTWO_VADDR(paddr); - else { - int i; - - for (i = 0; i < kmap_chunk_count;){ - unsigned long phys = kmap_chunks[i++]; - unsigned long size = kmap_chunks[i++]; - unsigned long virt = kmap_chunks[i++]; - if (paddr >= phys - && paddr < (phys + size)){ - ret = virt + paddr - phys; - goto exit; - } - } - - ret = (unsigned long) __va(paddr); - } -exit: -#ifdef DEBUGPV - printk ("PTOV(%lx)=%lx\n", paddr, ret); -#endif -#else - ret = (unsigned long)paddr + KERNELBASE; -#endif - return ret; -} - diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c deleted file mode 100644 index 724f97e5dee..00000000000 --- a/arch/powerpc/mm/pgtable64.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * This file contains ioremap and related functions for 64-bit machines. - * - * Derived from arch/ppc64/mm/init.c - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * Dave Engebretsen - * Rework for PPC64 port. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if PGTABLE_RANGE > USER_VSID_RANGE -#warning Limited user VSID range means pagetable space is wasted -#endif - -#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) -#warning TASK_SIZE is smaller than it needs to be. -#endif - -int mem_init_done; -unsigned long ioremap_bot = IMALLOC_BASE; -static unsigned long phbs_io_bot = PHBS_IO_BASE; - -extern pgd_t swapper_pg_dir[]; -extern struct task_struct *current_set[NR_CPUS]; - -unsigned long klimit = (unsigned long)_end; - -/* max amount of RAM to use */ -unsigned long __max_memory; - -/* info on what we think the IO hole is */ -unsigned long io_hole_start; -unsigned long io_hole_size; - -#ifdef CONFIG_PPC_ISERIES - -void __iomem *ioremap(unsigned long addr, unsigned long size) -{ - return (void __iomem *)addr; -} - -extern void __iomem *__ioremap(unsigned long addr, unsigned long size, - unsigned long flags) -{ - return (void __iomem *)addr; -} - -void iounmap(volatile void __iomem *addr) -{ - return; -} - -#else - -/* - * map_io_page currently only called by __ioremap - * map_io_page adds an entry to the ioremap page table - * and adds an entry to the HPT, possibly bolting it - */ -static int map_io_page(unsigned long ea, unsigned long pa, int flags) -{ - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - unsigned long vsid; - - if (mem_init_done) { - spin_lock(&init_mm.page_table_lock); - pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); - if (!pudp) - return -ENOMEM; - pmdp = pmd_alloc(&init_mm, pudp, ea); - if (!pmdp) - return -ENOMEM; - ptep = pte_alloc_kernel(&init_mm, pmdp, ea); - if (!ptep) - return -ENOMEM; - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, - __pgprot(flags))); - spin_unlock(&init_mm.page_table_lock); - } else { - unsigned long va, vpn, hash, hpteg; - - /* - * If the mm subsystem is not fully up, we cannot create a - * linux page table entry for this mapping. Simply bolt an - * entry in the hardware page table. - */ - vsid = get_kernel_vsid(ea); - va = (vsid << 28) | (ea & 0xFFFFFFF); - vpn = va >> PAGE_SHIFT; - - hash = hpt_hash(vpn, 0); - - hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - - /* Panic if a pte grpup is full */ - if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, - HPTE_V_BOLTED, - _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) - == -1) { - panic("map_io_page: could not insert mapping"); - } - } - return 0; -} - - -static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, - unsigned long ea, unsigned long size, - unsigned long flags) -{ - unsigned long i; - - if ((flags & _PAGE_PRESENT) == 0) - flags |= pgprot_val(PAGE_KERNEL); - - for (i = 0; i < size; i += PAGE_SIZE) - if (map_io_page(ea+i, pa+i, flags)) - return NULL; - - return (void __iomem *) (ea + (addr & ~PAGE_MASK)); -} - - -void __iomem * -ioremap(unsigned long addr, unsigned long size) -{ - return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); -} - -void __iomem * __ioremap(unsigned long addr, unsigned long size, - unsigned long flags) -{ - unsigned long pa, ea; - void __iomem *ret; - - /* - * Choose an address to map it to. - * Once the imalloc system is running, we use it. - * Before that, we map using addresses going - * up from ioremap_bot. imalloc will use - * the addresses from ioremap_bot through - * IMALLOC_END - * - */ - pa = addr & PAGE_MASK; - size = PAGE_ALIGN(addr + size) - pa; - - if (size == 0) - return NULL; - - if (mem_init_done) { - struct vm_struct *area; - area = im_get_free_area(size); - if (area == NULL) - return NULL; - ea = (unsigned long)(area->addr); - ret = __ioremap_com(addr, pa, ea, size, flags); - if (!ret) - im_free(area->addr); - } else { - ea = ioremap_bot; - ret = __ioremap_com(addr, pa, ea, size, flags); - if (ret) - ioremap_bot += size; - } - return ret; -} - -#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) - -int __ioremap_explicit(unsigned long pa, unsigned long ea, - unsigned long size, unsigned long flags) -{ - struct vm_struct *area; - void __iomem *ret; - - /* For now, require page-aligned values for pa, ea, and size */ - if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || - !IS_PAGE_ALIGNED(size)) { - printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); - return 1; - } - - if (!mem_init_done) { - /* Two things to consider in this case: - * 1) No records will be kept (imalloc, etc) that the region - * has been remapped - * 2) It won't be easy to iounmap() the region later (because - * of 1) - */ - ; - } else { - area = im_get_area(ea, size, - IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); - if (area == NULL) { - /* Expected when PHB-dlpar is in play */ - return 1; - } - if (ea != (unsigned long) area->addr) { - printk(KERN_ERR "unexpected addr return from " - "im_get_area\n"); - return 1; - } - } - - ret = __ioremap_com(pa, pa, ea, size, flags); - if (ret == NULL) { - printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); - return 1; - } - if (ret != (void *) ea) { - printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); - return 1; - } - - return 0; -} - -/* - * Unmap an IO region and remove it from imalloc'd list. - * Access to IO memory should be serialized by driver. - * This code is modeled after vmalloc code - unmap_vm_area() - * - * XXX what about calls before mem_init_done (ie python_countermeasures()) - */ -void iounmap(volatile void __iomem *token) -{ - void *addr; - - if (!mem_init_done) - return; - - addr = (void *) ((unsigned long __force) token & PAGE_MASK); - - im_free(addr); -} - -static int iounmap_subset_regions(unsigned long addr, unsigned long size) -{ - struct vm_struct *area; - - /* Check whether subsets of this region exist */ - area = im_get_area(addr, size, IM_REGION_SUPERSET); - if (area == NULL) - return 1; - - while (area) { - iounmap((void __iomem *) area->addr); - area = im_get_area(addr, size, - IM_REGION_SUPERSET); - } - - return 0; -} - -int iounmap_explicit(volatile void __iomem *start, unsigned long size) -{ - struct vm_struct *area; - unsigned long addr; - int rc; - - addr = (unsigned long __force) start & PAGE_MASK; - - /* Verify that the region either exists or is a subset of an existing - * region. In the latter case, split the parent region to create - * the exact region - */ - area = im_get_area(addr, size, - IM_REGION_EXISTS | IM_REGION_SUBSET); - if (area == NULL) { - /* Determine whether subset regions exist. If so, unmap */ - rc = iounmap_subset_regions(addr, size); - if (rc) { - printk(KERN_ERR - "%s() cannot unmap nonexistent range 0x%lx\n", - __FUNCTION__, addr); - return 1; - } - } else { - iounmap((void __iomem *) area->addr); - } - /* - * FIXME! This can't be right: - iounmap(area->addr); - * Maybe it should be "iounmap(area);" - */ - return 0; -} - -#endif - -EXPORT_SYMBOL(ioremap); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c new file mode 100644 index 00000000000..5792e533916 --- /dev/null +++ b/arch/powerpc/mm/pgtable_32.c @@ -0,0 +1,469 @@ +/* + * This file contains the routines setting up the linux page tables. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mmu_decl.h" + +unsigned long ioremap_base; +unsigned long ioremap_bot; +int io_bat_index; + +#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) +#define HAVE_BATS 1 +#endif + +#if defined(CONFIG_FSL_BOOKE) +#define HAVE_TLBCAM 1 +#endif + +extern char etext[], _stext[]; + +#ifdef CONFIG_SMP +extern void hash_page_sync(void); +#endif + +#ifdef HAVE_BATS +extern unsigned long v_mapped_by_bats(unsigned long va); +extern unsigned long p_mapped_by_bats(unsigned long pa); +void setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags); + +#else /* !HAVE_BATS */ +#define v_mapped_by_bats(x) (0UL) +#define p_mapped_by_bats(x) (0UL) +#endif /* HAVE_BATS */ + +#ifdef HAVE_TLBCAM +extern unsigned int tlbcam_index; +extern unsigned long v_mapped_by_tlbcam(unsigned long va); +extern unsigned long p_mapped_by_tlbcam(unsigned long pa); +#else /* !HAVE_TLBCAM */ +#define v_mapped_by_tlbcam(x) (0UL) +#define p_mapped_by_tlbcam(x) (0UL) +#endif /* HAVE_TLBCAM */ + +#ifdef CONFIG_PTE_64BIT +/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ +#define PGDIR_ORDER 1 +#else +#define PGDIR_ORDER 0 +#endif + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); + return ret; +} + +void pgd_free(pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGDIR_ORDER); +} + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + extern int mem_init_done; + extern void *early_get_page(void); + + if (mem_init_done) { + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + } else { + pte = (pte_t *)early_get_page(); + if (pte) + clear_page(pte); + } + return pte; +} + +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *ptepage; + +#ifdef CONFIG_HIGHPTE + int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; +#else + int flags = GFP_KERNEL | __GFP_REPEAT; +#endif + + ptepage = alloc_pages(flags, 0); + if (ptepage) + clear_highpage(ptepage); + return ptepage; +} + +void pte_free_kernel(pte_t *pte) +{ +#ifdef CONFIG_SMP + hash_page_sync(); +#endif + free_page((unsigned long)pte); +} + +void pte_free(struct page *ptepage) +{ +#ifdef CONFIG_SMP + hash_page_sync(); +#endif + __free_page(ptepage); +} + +#ifndef CONFIG_PHYS_64BIT +void __iomem * +ioremap(phys_addr_t addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} +#else /* CONFIG_PHYS_64BIT */ +void __iomem * +ioremap64(unsigned long long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} + +void __iomem * +ioremap(phys_addr_t addr, unsigned long size) +{ + phys_addr_t addr64 = fixup_bigphys_addr(addr, size); + + return ioremap64(addr64, size); +} +#endif /* CONFIG_PHYS_64BIT */ + +void __iomem * +__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) +{ + unsigned long v, i; + phys_addr_t p; + int err; + + /* + * Choose an address to map it to. + * Once the vmalloc system is running, we use it. + * Before then, we use space going down from ioremap_base + * (ioremap_bot records where we're up to). + */ + p = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - p; + + /* + * If the address lies within the first 16 MB, assume it's in ISA + * memory space + */ + if (p < 16*1024*1024) + p += _ISA_MEM_BASE; + + /* + * Don't allow anybody to remap normal RAM that we're using. + * mem_init() sets high_memory so only do the check after that. + */ + if (mem_init_done && (p < virt_to_phys(high_memory))) { + printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p, + __builtin_return_address(0)); + return NULL; + } + + if (size == 0) + return NULL; + + /* + * Is it already mapped? Perhaps overlapped by a previous + * BAT mapping. If the whole area is mapped then we're done, + * otherwise remap it since we want to keep the virt addrs for + * each request contiguous. + * + * We make the assumption here that if the bottom and top + * of the range we want are mapped then it's mapped to the + * same virt address (and this is contiguous). + * -- Cort + */ + if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ ) + goto out; + + if ((v = p_mapped_by_tlbcam(p))) + goto out; + + if (mem_init_done) { + struct vm_struct *area; + area = get_vm_area(size, VM_IOREMAP); + if (area == 0) + return NULL; + v = (unsigned long) area->addr; + } else { + v = (ioremap_bot -= size); + } + + if ((flags & _PAGE_PRESENT) == 0) + flags |= _PAGE_KERNEL; + if (flags & _PAGE_NO_CACHE) + flags |= _PAGE_GUARDED; + + /* + * Should check if it is a candidate for a BAT mapping + */ + + err = 0; + for (i = 0; i < size && err == 0; i += PAGE_SIZE) + err = map_page(v+i, p+i, flags); + if (err) { + if (mem_init_done) + vunmap((void *)v); + return NULL; + } + +out: + return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); +} + +void iounmap(volatile void __iomem *addr) +{ + /* + * If mapped by BATs then there is nothing to do. + * Calling vfree() generates a benign warning. + */ + if (v_mapped_by_bats((unsigned long)addr)) return; + + if (addr > high_memory && (unsigned long) addr < ioremap_bot) + vunmap((void *) (PAGE_MASK & (unsigned long)addr)); +} + +void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *) (port + _IO_BASE); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +int +map_page(unsigned long va, phys_addr_t pa, int flags) +{ + pmd_t *pd; + pte_t *pg; + int err = -ENOMEM; + + spin_lock(&init_mm.page_table_lock); + /* Use upper 10 bits of VA to index the first level map */ + pd = pmd_offset(pgd_offset_k(va), va); + /* Use middle 10 bits of VA to index the second-level map */ + pg = pte_alloc_kernel(&init_mm, pd, va); + if (pg != 0) { + err = 0; + set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); + if (mem_init_done) + flush_HPTE(0, va, pmd_val(*pd)); + } + spin_unlock(&init_mm.page_table_lock); + return err; +} + +/* + * Map in all of physical memory starting at KERNELBASE. + */ +void __init mapin_ram(void) +{ + unsigned long v, p, s, f; + + s = mmu_mapin_ram(); + v = KERNELBASE + s; + p = PPC_MEMSTART + s; + for (; s < total_lowmem; s += PAGE_SIZE) { + if ((char *) v >= _stext && (char *) v < etext) + f = _PAGE_RAM_TEXT; + else + f = _PAGE_RAM; + map_page(v, p, f); + v += PAGE_SIZE; + p += PAGE_SIZE; + } +} + +/* is x a power of 2? */ +#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) + +/* is x a power of 4? */ +#define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1)) + +/* + * Set up a mapping for a block of I/O. + * virt, phys, size must all be page-aligned. + * This should only be called before ioremap is called. + */ +void __init io_block_mapping(unsigned long virt, phys_addr_t phys, + unsigned int size, int flags) +{ + int i; + + if (virt > KERNELBASE && virt < ioremap_bot) + ioremap_bot = ioremap_base = virt; + +#ifdef HAVE_BATS + /* + * Use a BAT for this if possible... + */ + if (io_bat_index < 2 && is_power_of_2(size) + && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { + setbat(io_bat_index, virt, phys, size, flags); + ++io_bat_index; + return; + } +#endif /* HAVE_BATS */ + +#ifdef HAVE_TLBCAM + /* + * Use a CAM for this if possible... + */ + if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) + && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { + settlbcam(tlbcam_index, virt, phys, size, flags, 0); + ++tlbcam_index; + return; + } +#endif /* HAVE_TLBCAM */ + + /* No BATs available, put it in the page tables. */ + for (i = 0; i < size; i += PAGE_SIZE) + map_page(virt + i, phys + i, flags); +} + +/* Scan the real Linux page tables and return a PTE pointer for + * a virtual address in a context. + * Returns true (1) if PTE was found, zero otherwise. The pointer to + * the PTE pointer is unmodified if PTE is not found. + */ +int +get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int retval = 0; + + pgd = pgd_offset(mm, addr & PAGE_MASK); + if (pgd) { + pmd = pmd_offset(pgd, addr & PAGE_MASK); + if (pmd_present(*pmd)) { + pte = pte_offset_map(pmd, addr & PAGE_MASK); + if (pte) { + retval = 1; + *ptep = pte; + /* XXX caller needs to do pte_unmap, yuck */ + } + } + } + return(retval); +} + +/* Find physical address for this virtual address. Normally used by + * I/O functions, but anyone can call it. + */ +unsigned long iopa(unsigned long addr) +{ + unsigned long pa; + + /* I don't know why this won't work on PMacs or CHRP. It + * appears there is some bug, or there is some implicit + * mapping done not properly represented by BATs or in page + * tables.......I am actively working on resolving this, but + * can't hold up other stuff. -- Dan + */ + pte_t *pte; + struct mm_struct *mm; + + /* Check the BATs */ + pa = v_mapped_by_bats(addr); + if (pa) + return pa; + + /* Allow mapping of user addresses (within the thread) + * for DMA if necessary. + */ + if (addr < TASK_SIZE) + mm = current->mm; + else + mm = &init_mm; + + pa = 0; + if (get_pteptr(mm, addr, &pte)) { + pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); + pte_unmap(pte); + } + + return(pa); +} + +/* This is will find the virtual address for a physical one.... + * Swiped from APUS, could be dangerous :-). + * This is only a placeholder until I really find a way to make this + * work. -- Dan + */ +unsigned long +mm_ptov (unsigned long paddr) +{ + unsigned long ret; +#if 0 + if (paddr < 16*1024*1024) + ret = ZTWO_VADDR(paddr); + else { + int i; + + for (i = 0; i < kmap_chunk_count;){ + unsigned long phys = kmap_chunks[i++]; + unsigned long size = kmap_chunks[i++]; + unsigned long virt = kmap_chunks[i++]; + if (paddr >= phys + && paddr < (phys + size)){ + ret = virt + paddr - phys; + goto exit; + } + } + + ret = (unsigned long) __va(paddr); + } +exit: +#ifdef DEBUGPV + printk ("PTOV(%lx)=%lx\n", paddr, ret); +#endif +#else + ret = (unsigned long)paddr + KERNELBASE; +#endif + return ret; +} + diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c new file mode 100644 index 00000000000..724f97e5dee --- /dev/null +++ b/arch/powerpc/mm/pgtable_64.c @@ -0,0 +1,357 @@ +/* + * This file contains ioremap and related functions for 64-bit machines. + * + * Derived from arch/ppc64/mm/init.c + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + +int mem_init_done; +unsigned long ioremap_bot = IMALLOC_BASE; +static unsigned long phbs_io_bot = PHBS_IO_BASE; + +extern pgd_t swapper_pg_dir[]; +extern struct task_struct *current_set[NR_CPUS]; + +unsigned long klimit = (unsigned long)_end; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* info on what we think the IO hole is */ +unsigned long io_hole_start; +unsigned long io_hole_size; + +#ifdef CONFIG_PPC_ISERIES + +void __iomem *ioremap(unsigned long addr, unsigned long size) +{ + return (void __iomem *)addr; +} + +extern void __iomem *__ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + return (void __iomem *)addr; +} + +void iounmap(volatile void __iomem *addr) +{ + return; +} + +#else + +/* + * map_io_page currently only called by __ioremap + * map_io_page adds an entry to the ioremap page table + * and adds an entry to the HPT, possibly bolting it + */ +static int map_io_page(unsigned long ea, unsigned long pa, int flags) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long vsid; + + if (mem_init_done) { + spin_lock(&init_mm.page_table_lock); + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + ptep = pte_alloc_kernel(&init_mm, pmdp, ea); + if (!ptep) + return -ENOMEM; + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, + __pgprot(flags))); + spin_unlock(&init_mm.page_table_lock); + } else { + unsigned long va, vpn, hash, hpteg; + + /* + * If the mm subsystem is not fully up, we cannot create a + * linux page table entry for this mapping. Simply bolt an + * entry in the hardware page table. + */ + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0xFFFFFFF); + vpn = va >> PAGE_SHIFT; + + hash = hpt_hash(vpn, 0); + + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + + /* Panic if a pte grpup is full */ + if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, + HPTE_V_BOLTED, + _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) + == -1) { + panic("map_io_page: could not insert mapping"); + } + } + return 0; +} + + +static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, + unsigned long ea, unsigned long size, + unsigned long flags) +{ + unsigned long i; + + if ((flags & _PAGE_PRESENT) == 0) + flags |= pgprot_val(PAGE_KERNEL); + + for (i = 0; i < size; i += PAGE_SIZE) + if (map_io_page(ea+i, pa+i, flags)) + return NULL; + + return (void __iomem *) (ea + (addr & ~PAGE_MASK)); +} + + +void __iomem * +ioremap(unsigned long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); +} + +void __iomem * __ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + unsigned long pa, ea; + void __iomem *ret; + + /* + * Choose an address to map it to. + * Once the imalloc system is running, we use it. + * Before that, we map using addresses going + * up from ioremap_bot. imalloc will use + * the addresses from ioremap_bot through + * IMALLOC_END + * + */ + pa = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - pa; + + if (size == 0) + return NULL; + + if (mem_init_done) { + struct vm_struct *area; + area = im_get_free_area(size); + if (area == NULL) + return NULL; + ea = (unsigned long)(area->addr); + ret = __ioremap_com(addr, pa, ea, size, flags); + if (!ret) + im_free(area->addr); + } else { + ea = ioremap_bot; + ret = __ioremap_com(addr, pa, ea, size, flags); + if (ret) + ioremap_bot += size; + } + return ret; +} + +#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) + +int __ioremap_explicit(unsigned long pa, unsigned long ea, + unsigned long size, unsigned long flags) +{ + struct vm_struct *area; + void __iomem *ret; + + /* For now, require page-aligned values for pa, ea, and size */ + if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || + !IS_PAGE_ALIGNED(size)) { + printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); + return 1; + } + + if (!mem_init_done) { + /* Two things to consider in this case: + * 1) No records will be kept (imalloc, etc) that the region + * has been remapped + * 2) It won't be easy to iounmap() the region later (because + * of 1) + */ + ; + } else { + area = im_get_area(ea, size, + IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); + if (area == NULL) { + /* Expected when PHB-dlpar is in play */ + return 1; + } + if (ea != (unsigned long) area->addr) { + printk(KERN_ERR "unexpected addr return from " + "im_get_area\n"); + return 1; + } + } + + ret = __ioremap_com(pa, pa, ea, size, flags); + if (ret == NULL) { + printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); + return 1; + } + if (ret != (void *) ea) { + printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); + return 1; + } + + return 0; +} + +/* + * Unmap an IO region and remove it from imalloc'd list. + * Access to IO memory should be serialized by driver. + * This code is modeled after vmalloc code - unmap_vm_area() + * + * XXX what about calls before mem_init_done (ie python_countermeasures()) + */ +void iounmap(volatile void __iomem *token) +{ + void *addr; + + if (!mem_init_done) + return; + + addr = (void *) ((unsigned long __force) token & PAGE_MASK); + + im_free(addr); +} + +static int iounmap_subset_regions(unsigned long addr, unsigned long size) +{ + struct vm_struct *area; + + /* Check whether subsets of this region exist */ + area = im_get_area(addr, size, IM_REGION_SUPERSET); + if (area == NULL) + return 1; + + while (area) { + iounmap((void __iomem *) area->addr); + area = im_get_area(addr, size, + IM_REGION_SUPERSET); + } + + return 0; +} + +int iounmap_explicit(volatile void __iomem *start, unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + int rc; + + addr = (unsigned long __force) start & PAGE_MASK; + + /* Verify that the region either exists or is a subset of an existing + * region. In the latter case, split the parent region to create + * the exact region + */ + area = im_get_area(addr, size, + IM_REGION_EXISTS | IM_REGION_SUBSET); + if (area == NULL) { + /* Determine whether subset regions exist. If so, unmap */ + rc = iounmap_subset_regions(addr, size); + if (rc) { + printk(KERN_ERR + "%s() cannot unmap nonexistent range 0x%lx\n", + __FUNCTION__, addr); + return 1; + } + } else { + iounmap((void __iomem *) area->addr); + } + /* + * FIXME! This can't be right: + iounmap(area->addr); + * Maybe it should be "iounmap(area);" + */ + return 0; +} + +#endif + +EXPORT_SYMBOL(ioremap); +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c deleted file mode 100644 index cef9e83cc7e..00000000000 --- a/arch/powerpc/mm/ppc_mmu.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * This file contains the routines for handling the MMU on those - * PowerPC implementations where the MMU substantially follows the - * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "mmu_decl.h" - -PTE *Hash, *Hash_end; -unsigned long Hash_size, Hash_mask; -unsigned long _SDR1; - -union ubat { /* BAT register values to be loaded */ - BAT bat; -#ifdef CONFIG_PPC64BRIDGE - u64 word[2]; -#else - u32 word[2]; -#endif -} BATS[4][2]; /* 4 pairs of IBAT, DBAT */ - -struct batrange { /* stores address ranges mapped by BATs */ - unsigned long start; - unsigned long limit; - unsigned long phys; -} bat_addrs[4]; - -/* - * Return PA for this VA if it is mapped by a BAT, or 0 - */ -unsigned long v_mapped_by_bats(unsigned long va) -{ - int b; - for (b = 0; b < 4; ++b) - if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) - return bat_addrs[b].phys + (va - bat_addrs[b].start); - return 0; -} - -/* - * Return VA for a given PA or 0 if not mapped - */ -unsigned long p_mapped_by_bats(unsigned long pa) -{ - int b; - for (b = 0; b < 4; ++b) - if (pa >= bat_addrs[b].phys - && pa < (bat_addrs[b].limit-bat_addrs[b].start) - +bat_addrs[b].phys) - return bat_addrs[b].start+(pa-bat_addrs[b].phys); - return 0; -} - -unsigned long __init mmu_mapin_ram(void) -{ -#ifdef CONFIG_POWER4 - return 0; -#else - unsigned long tot, bl, done; - unsigned long max_size = (256<<20); - unsigned long align; - - if (__map_without_bats) - return 0; - - /* Set up BAT2 and if necessary BAT3 to cover RAM. */ - - /* Make sure we don't map a block larger than the - smallest alignment of the physical address. */ - /* alignment of PPC_MEMSTART */ - align = ~(PPC_MEMSTART-1) & PPC_MEMSTART; - /* set BAT block size to MIN(max_size, align) */ - if (align && align < max_size) - max_size = align; - - tot = total_lowmem; - for (bl = 128<<10; bl < max_size; bl <<= 1) { - if (bl * 2 > tot) - break; - } - - setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM); - done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; - if ((done < tot) && !bat_addrs[3].limit) { - /* use BAT3 to cover a bit more */ - tot -= done; - for (bl = 128<<10; bl < max_size; bl <<= 1) - if (bl * 2 > tot) - break; - setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM); - done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; - } - - return done; -#endif -} - -/* - * Set up one of the I/D BAT (block address translation) register pairs. - * The parameters are not checked; in particular size must be a power - * of 2 between 128k and 256M. - */ -void __init setbat(int index, unsigned long virt, unsigned long phys, - unsigned int size, int flags) -{ - unsigned int bl; - int wimgxpp; - union ubat *bat = BATS[index]; - - if (((flags & _PAGE_NO_CACHE) == 0) && - cpu_has_feature(CPU_FTR_NEED_COHERENT)) - flags |= _PAGE_COHERENT; - - bl = (size >> 17) - 1; - if (PVR_VER(mfspr(SPRN_PVR)) != 1) { - /* 603, 604, etc. */ - /* Do DBAT first */ - wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE - | _PAGE_COHERENT | _PAGE_GUARDED); - wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; - bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ - bat[1].word[1] = phys | wimgxpp; -#ifndef CONFIG_KGDB /* want user access for breakpoints */ - if (flags & _PAGE_USER) -#endif - bat[1].bat.batu.vp = 1; - if (flags & _PAGE_GUARDED) { - /* G bit must be zero in IBATs */ - bat[0].word[0] = bat[0].word[1] = 0; - } else { - /* make IBAT same as DBAT */ - bat[0] = bat[1]; - } - } else { - /* 601 cpu */ - if (bl > BL_8M) - bl = BL_8M; - wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE - | _PAGE_COHERENT); - wimgxpp |= (flags & _PAGE_RW)? - ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; - bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ - bat->word[1] = phys | bl | 0x40; /* V=1 */ - } - - bat_addrs[index].start = virt; - bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; - bat_addrs[index].phys = phys; -} - -/* - * Initialize the hash table and patch the instructions in hashtable.S. - */ -void __init MMU_init_hw(void) -{ - unsigned int hmask, mb, mb2; - unsigned int n_hpteg, lg_n_hpteg; - - extern unsigned int hash_page_patch_A[]; - extern unsigned int hash_page_patch_B[], hash_page_patch_C[]; - extern unsigned int hash_page[]; - extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; - - if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { - /* - * Put a blr (procedure return) instruction at the - * start of hash_page, since we can still get DSI - * exceptions on a 603. - */ - hash_page[0] = 0x4e800020; - flush_icache_range((unsigned long) &hash_page[0], - (unsigned long) &hash_page[1]); - return; - } - - if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); - -#ifdef CONFIG_PPC64BRIDGE -#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */ -#define SDR1_LOW_BITS (lg_n_hpteg - 11) -#define MIN_N_HPTEG 2048 /* min 256kB hash table */ -#else -#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */ -#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10) -#define MIN_N_HPTEG 1024 /* min 64kB hash table */ -#endif - - /* - * Allow 1 HPTE (1/8 HPTEG) for each page of memory. - * This is less than the recommended amount, but then - * Linux ain't AIX. - */ - n_hpteg = total_memory / (PAGE_SIZE * 8); - if (n_hpteg < MIN_N_HPTEG) - n_hpteg = MIN_N_HPTEG; - lg_n_hpteg = __ilog2(n_hpteg); - if (n_hpteg & (n_hpteg - 1)) { - ++lg_n_hpteg; /* round up if not power of 2 */ - n_hpteg = 1 << lg_n_hpteg; - } - Hash_size = n_hpteg << LG_HPTEG_SIZE; - - /* - * Find some memory for the hash table. - */ - if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(lmb_alloc_base(Hash_size, Hash_size, - __initial_memory_limit)); - cacheable_memzero(Hash, Hash_size); - _SDR1 = __pa(Hash) | SDR1_LOW_BITS; - - Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); - - printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", - total_memory >> 20, Hash_size >> 10, Hash); - - - /* - * Patch up the instructions in hashtable.S:create_hpte - */ - if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); - Hash_mask = n_hpteg - 1; - hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); - mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; - if (lg_n_hpteg > 16) - mb2 = 16 - LG_HPTEG_SIZE; - - hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) - | ((unsigned int)(Hash) >> 16); - hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6); - hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6); - hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask; - hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask; - - /* - * Ensure that the locations we've patched have been written - * out from the data cache and invalidated in the instruction - * cache, on those machines with split caches. - */ - flush_icache_range((unsigned long) &hash_page_patch_A[0], - (unsigned long) &hash_page_patch_C[1]); - - /* - * Patch up the instructions in hashtable.S:flush_hash_page - */ - flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff) - | ((unsigned int)(Hash) >> 16); - flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6); - flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6); - flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask; - flush_icache_range((unsigned long) &flush_hash_patch_A[0], - (unsigned long) &flush_hash_patch_B[1]); - - if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); -} diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c new file mode 100644 index 00000000000..cef9e83cc7e --- /dev/null +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -0,0 +1,285 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mmu_decl.h" + +PTE *Hash, *Hash_end; +unsigned long Hash_size, Hash_mask; +unsigned long _SDR1; + +union ubat { /* BAT register values to be loaded */ + BAT bat; +#ifdef CONFIG_PPC64BRIDGE + u64 word[2]; +#else + u32 word[2]; +#endif +} BATS[4][2]; /* 4 pairs of IBAT, DBAT */ + +struct batrange { /* stores address ranges mapped by BATs */ + unsigned long start; + unsigned long limit; + unsigned long phys; +} bat_addrs[4]; + +/* + * Return PA for this VA if it is mapped by a BAT, or 0 + */ +unsigned long v_mapped_by_bats(unsigned long va) +{ + int b; + for (b = 0; b < 4; ++b) + if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) + return bat_addrs[b].phys + (va - bat_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_mapped_by_bats(unsigned long pa) +{ + int b; + for (b = 0; b < 4; ++b) + if (pa >= bat_addrs[b].phys + && pa < (bat_addrs[b].limit-bat_addrs[b].start) + +bat_addrs[b].phys) + return bat_addrs[b].start+(pa-bat_addrs[b].phys); + return 0; +} + +unsigned long __init mmu_mapin_ram(void) +{ +#ifdef CONFIG_POWER4 + return 0; +#else + unsigned long tot, bl, done; + unsigned long max_size = (256<<20); + unsigned long align; + + if (__map_without_bats) + return 0; + + /* Set up BAT2 and if necessary BAT3 to cover RAM. */ + + /* Make sure we don't map a block larger than the + smallest alignment of the physical address. */ + /* alignment of PPC_MEMSTART */ + align = ~(PPC_MEMSTART-1) & PPC_MEMSTART; + /* set BAT block size to MIN(max_size, align) */ + if (align && align < max_size) + max_size = align; + + tot = total_lowmem; + for (bl = 128<<10; bl < max_size; bl <<= 1) { + if (bl * 2 > tot) + break; + } + + setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; + if ((done < tot) && !bat_addrs[3].limit) { + /* use BAT3 to cover a bit more */ + tot -= done; + for (bl = 128<<10; bl < max_size; bl <<= 1) + if (bl * 2 > tot) + break; + setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; + } + + return done; +#endif +} + +/* + * Set up one of the I/D BAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 2 between 128k and 256M. + */ +void __init setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags) +{ + unsigned int bl; + int wimgxpp; + union ubat *bat = BATS[index]; + + if (((flags & _PAGE_NO_CACHE) == 0) && + cpu_has_feature(CPU_FTR_NEED_COHERENT)) + flags |= _PAGE_COHERENT; + + bl = (size >> 17) - 1; + if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + /* 603, 604, etc. */ + /* Do DBAT first */ + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT | _PAGE_GUARDED); + wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; + bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].word[1] = phys | wimgxpp; +#ifndef CONFIG_KGDB /* want user access for breakpoints */ + if (flags & _PAGE_USER) +#endif + bat[1].bat.batu.vp = 1; + if (flags & _PAGE_GUARDED) { + /* G bit must be zero in IBATs */ + bat[0].word[0] = bat[0].word[1] = 0; + } else { + /* make IBAT same as DBAT */ + bat[0] = bat[1]; + } + } else { + /* 601 cpu */ + if (bl > BL_8M) + bl = BL_8M; + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT); + wimgxpp |= (flags & _PAGE_RW)? + ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; + bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ + bat->word[1] = phys | bl | 0x40; /* V=1 */ + } + + bat_addrs[index].start = virt; + bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; + bat_addrs[index].phys = phys; +} + +/* + * Initialize the hash table and patch the instructions in hashtable.S. + */ +void __init MMU_init_hw(void) +{ + unsigned int hmask, mb, mb2; + unsigned int n_hpteg, lg_n_hpteg; + + extern unsigned int hash_page_patch_A[]; + extern unsigned int hash_page_patch_B[], hash_page_patch_C[]; + extern unsigned int hash_page[]; + extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; + + if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { + /* + * Put a blr (procedure return) instruction at the + * start of hash_page, since we can still get DSI + * exceptions on a 603. + */ + hash_page[0] = 0x4e800020; + flush_icache_range((unsigned long) &hash_page[0], + (unsigned long) &hash_page[1]); + return; + } + + if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); + +#ifdef CONFIG_PPC64BRIDGE +#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */ +#define SDR1_LOW_BITS (lg_n_hpteg - 11) +#define MIN_N_HPTEG 2048 /* min 256kB hash table */ +#else +#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */ +#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10) +#define MIN_N_HPTEG 1024 /* min 64kB hash table */ +#endif + + /* + * Allow 1 HPTE (1/8 HPTEG) for each page of memory. + * This is less than the recommended amount, but then + * Linux ain't AIX. + */ + n_hpteg = total_memory / (PAGE_SIZE * 8); + if (n_hpteg < MIN_N_HPTEG) + n_hpteg = MIN_N_HPTEG; + lg_n_hpteg = __ilog2(n_hpteg); + if (n_hpteg & (n_hpteg - 1)) { + ++lg_n_hpteg; /* round up if not power of 2 */ + n_hpteg = 1 << lg_n_hpteg; + } + Hash_size = n_hpteg << LG_HPTEG_SIZE; + + /* + * Find some memory for the hash table. + */ + if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); + Hash = __va(lmb_alloc_base(Hash_size, Hash_size, + __initial_memory_limit)); + cacheable_memzero(Hash, Hash_size); + _SDR1 = __pa(Hash) | SDR1_LOW_BITS; + + Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); + + printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", + total_memory >> 20, Hash_size >> 10, Hash); + + + /* + * Patch up the instructions in hashtable.S:create_hpte + */ + if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); + Hash_mask = n_hpteg - 1; + hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); + mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; + if (lg_n_hpteg > 16) + mb2 = 16 - LG_HPTEG_SIZE; + + hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) + | ((unsigned int)(Hash) >> 16); + hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6); + hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6); + hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask; + hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask; + + /* + * Ensure that the locations we've patched have been written + * out from the data cache and invalidated in the instruction + * cache, on those machines with split caches. + */ + flush_icache_range((unsigned long) &hash_page_patch_A[0], + (unsigned long) &hash_page_patch_C[1]); + + /* + * Patch up the instructions in hashtable.S:flush_hash_page + */ + flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff) + | ((unsigned int)(Hash) >> 16); + flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6); + flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6); + flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask; + flush_icache_range((unsigned long) &flush_hash_patch_A[0], + (unsigned long) &flush_hash_patch_B[1]); + + if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); +} diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c deleted file mode 100644 index 6c3dc3c44c8..00000000000 --- a/arch/powerpc/mm/tlb.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * This file contains the routines for TLB flushing. - * On machines where the MMU uses a hash table to store virtual to - * physical translations, these routines flush entries from the - * hash table also. - * -- paulus - * - * Derived from arch/ppc/mm/init.c: - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) - * and Cort Dougan (PReP) (cort@cs.nmt.edu) - * Copyright (C) 1996 Paul Mackerras - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * Derived from "arch/i386/mm/init.c" - * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "mmu_decl.h" - -/* - * Called when unmapping pages to flush entries from the TLB/hash table. - */ -void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) -{ - unsigned long ptephys; - - if (Hash != 0) { - ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(mm->context, addr, ptephys, 1); - } -} - -/* - * Called by ptep_set_access_flags, must flush on CPUs for which the - * DSI handler can't just "fixup" the TLB on a write fault - */ -void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) -{ - if (Hash != 0) - return; - _tlbie(addr); -} - -/* - * Called at the end of a mmu_gather operation to make sure the - * TLB flush is completely done. - */ -void tlb_flush(struct mmu_gather *tlb) -{ - if (Hash == 0) { - /* - * 603 needs to flush the whole TLB here since - * it doesn't use a hash table. - */ - _tlbia(); - } -} - -/* - * TLB flushing: - * - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes kernel pages - * - * since the hardware hash table functions as an extension of the - * tlb as far as the linux tables are concerned, flush it too. - * -- Cort - */ - -/* - * 750 SMP is a Bad Idea because the 750 doesn't broadcast all - * the cache operations on the bus. Hence we need to use an IPI - * to get the other CPU(s) to invalidate their TLBs. - */ -#ifdef CONFIG_SMP_750 -#define FINISH_FLUSH smp_send_tlb_invalidate(0) -#else -#define FINISH_FLUSH do { } while (0) -#endif - -static void flush_range(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - pmd_t *pmd; - unsigned long pmd_end; - int count; - unsigned int ctx = mm->context; - - if (Hash == 0) { - _tlbia(); - return; - } - start &= PAGE_MASK; - if (start >= end) - return; - end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pgd_offset(mm, start), start); - for (;;) { - pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; - if (pmd_end > end) - pmd_end = end; - if (!pmd_none(*pmd)) { - count = ((pmd_end - start) >> PAGE_SHIFT) + 1; - flush_hash_pages(ctx, start, pmd_val(*pmd), count); - } - if (pmd_end == end) - break; - start = pmd_end + 1; - ++pmd; - } -} - -/* - * Flush kernel TLB entries in the given range - */ -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - flush_range(&init_mm, start, end); - FINISH_FLUSH; -} - -/* - * Flush all the (user) entries for the address space described by mm. - */ -void flush_tlb_mm(struct mm_struct *mm) -{ - struct vm_area_struct *mp; - - if (Hash == 0) { - _tlbia(); - return; - } - - for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) - flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); - FINISH_FLUSH; -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) -{ - struct mm_struct *mm; - pmd_t *pmd; - - if (Hash == 0) { - _tlbie(vmaddr); - return; - } - mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); - if (!pmd_none(*pmd)) - flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1); - FINISH_FLUSH; -} - -/* - * For each address in the range, find the pte for the address - * and check _PAGE_HASHPTE bit; if it is set, find and destroy - * the corresponding HPTE. - */ -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_range(vma->vm_mm, start, end); - FINISH_FLUSH; -} diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c new file mode 100644 index 00000000000..6c3dc3c44c8 --- /dev/null +++ b/arch/powerpc/mm/tlb_32.c @@ -0,0 +1,183 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU uses a hash table to store virtual to + * physical translations, these routines flush entries from the + * hash table also. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "mmu_decl.h" + +/* + * Called when unmapping pages to flush entries from the TLB/hash table. + */ +void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + unsigned long ptephys; + + if (Hash != 0) { + ptephys = __pa(ptep) & PAGE_MASK; + flush_hash_pages(mm->context, addr, ptephys, 1); + } +} + +/* + * Called by ptep_set_access_flags, must flush on CPUs for which the + * DSI handler can't just "fixup" the TLB on a write fault + */ +void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) +{ + if (Hash != 0) + return; + _tlbie(addr); +} + +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +void tlb_flush(struct mmu_gather *tlb) +{ + if (Hash == 0) { + /* + * 603 needs to flush the whole TLB here since + * it doesn't use a hash table. + */ + _tlbia(); + } +} + +/* + * TLB flushing: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * since the hardware hash table functions as an extension of the + * tlb as far as the linux tables are concerned, flush it too. + * -- Cort + */ + +/* + * 750 SMP is a Bad Idea because the 750 doesn't broadcast all + * the cache operations on the bus. Hence we need to use an IPI + * to get the other CPU(s) to invalidate their TLBs. + */ +#ifdef CONFIG_SMP_750 +#define FINISH_FLUSH smp_send_tlb_invalidate(0) +#else +#define FINISH_FLUSH do { } while (0) +#endif + +static void flush_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + pmd_t *pmd; + unsigned long pmd_end; + int count; + unsigned int ctx = mm->context; + + if (Hash == 0) { + _tlbia(); + return; + } + start &= PAGE_MASK; + if (start >= end) + return; + end = (end - 1) | ~PAGE_MASK; + pmd = pmd_offset(pgd_offset(mm, start), start); + for (;;) { + pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; + if (pmd_end > end) + pmd_end = end; + if (!pmd_none(*pmd)) { + count = ((pmd_end - start) >> PAGE_SHIFT) + 1; + flush_hash_pages(ctx, start, pmd_val(*pmd), count); + } + if (pmd_end == end) + break; + start = pmd_end + 1; + ++pmd; + } +} + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); + FINISH_FLUSH; +} + +/* + * Flush all the (user) entries for the address space described by mm. + */ +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *mp; + + if (Hash == 0) { + _tlbia(); + return; + } + + for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) + flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + FINISH_FLUSH; +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct mm_struct *mm; + pmd_t *pmd; + + if (Hash == 0) { + _tlbie(vmaddr); + return; + } + mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; + pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); + if (!pmd_none(*pmd)) + flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1); + FINISH_FLUSH; +} + +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_range(vma->vm_mm, start, end); + FINISH_FLUSH; +} -- cgit v1.2.3-70-g09d2