diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-book3e.c | 21 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 116 | ||||
-rw-r--r-- | arch/powerpc/mm/icswx.c | 273 | ||||
-rw-r--r-- | arch/powerpc/mm/icswx.h | 62 | ||||
-rw-r--r-- | arch/powerpc/mm/icswx_pid.c | 87 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/mmap_64.c | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash64.c | 195 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 36 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 3 |
15 files changed, 589 insertions, 305 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index f60e006d90c..388b95e1a00 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -78,11 +78,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) "tlbwe %1,%3,%5\n" "tlbwe %0,%3,%6\n" : -#ifdef CONFIG_PPC47x - : "r" (PPC47x_TLB2_S_RWX), -#else : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), -#endif "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), "r" (entry), @@ -221,7 +217,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, { u64 size; -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL /* We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 991ee813d2a..3787b61f7d2 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -21,6 +21,8 @@ obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ mmu_context_hash$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_PPC_ICSWX) += icswx.o +obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o obj-$(CONFIG_40x) += 40x_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 5efe8c96d37..2f0d1b032a8 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -44,6 +44,8 @@ #include <asm/siginfo.h> #include <mm/mmu_decl.h> +#include "icswx.h" + #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs) { @@ -143,6 +145,21 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ +#ifdef CONFIG_PPC_ICSWX + /* + * we need to do this early because this "data storage + * interrupt" does not update the DAR/DEAR so we don't want to + * look at it + */ + if (error_code & ICSWX_DSI_UCT) { + int ret; + + ret = acop_handle_fault(regs, address, error_code); + if (ret) + return ret; + } +#endif + if (notify_page_fault(regs)) return 0; diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 343ad0b8726..3bc700655fc 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -37,31 +37,32 @@ static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) return found; } -void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte) +void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, + pte_t pte) { unsigned long mas1, mas2; u64 mas7_3; unsigned long psize, tsize, shift; unsigned long flags; + struct mm_struct *mm; #ifdef CONFIG_PPC_FSL_BOOK3E - int index, lz, ncams; - struct vm_area_struct *vma; + int index, ncams; #endif if (unlikely(is_kernel_addr(ea))) return; + mm = vma->vm_mm; + #ifdef CONFIG_PPC_MM_SLICES - psize = mmu_get_tsize(get_slice_psize(mm, ea)); - tsize = mmu_get_psize(psize); + psize = get_slice_psize(mm, ea); + tsize = mmu_get_tsize(psize); shift = mmu_psize_defs[psize].shift; #else - vma = find_vma(mm, ea); - psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */ - asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize)); - shift = 31 - lz; - tsize = 21 - lz; + psize = vma_mmu_pagesize(vma); + shift = __ilog2(psize); + tsize = shift - 10; #endif /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8558b572e55..a8b3cc7d90f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -29,22 +29,22 @@ unsigned int HPAGE_SHIFT; /* * Tracks gpages after the device tree is scanned and before the - * huge_boot_pages list is ready. On 64-bit implementations, this is - * just used to track 16G pages and so is a single array. 32-bit - * implementations may have more than one gpage size due to limitations - * of the memory allocators, so we need multiple arrays + * huge_boot_pages list is ready. On non-Freescale implementations, this is + * just used to track 16G pages and so is a single array. FSL-based + * implementations may have more than one gpage size, so we need multiple + * arrays */ -#ifdef CONFIG_PPC64 -#define MAX_NUMBER_GPAGES 1024 -static u64 gpage_freearray[MAX_NUMBER_GPAGES]; -static unsigned nr_gpages; -#else +#ifdef CONFIG_PPC_FSL_BOOK3E #define MAX_NUMBER_GPAGES 128 struct psize_gpages { u64 gpage_list[MAX_NUMBER_GPAGES]; unsigned int nr_gpages; }; static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; +#else +#define MAX_NUMBER_GPAGES 1024 +static u64 gpage_freearray[MAX_NUMBER_GPAGES]; +static unsigned nr_gpages; #endif static inline int shift_to_mmu_psize(unsigned int shift) @@ -115,12 +115,12 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, struct kmem_cache *cachep; pte_t *new; -#ifdef CONFIG_PPC64 - cachep = PGT_CACHE(pdshift - pshift); -#else +#ifdef CONFIG_PPC_FSL_BOOK3E int i; int num_hugepd = 1 << (pshift - pdshift); cachep = hugepte_cache; +#else + cachep = PGT_CACHE(pdshift - pshift); #endif new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); @@ -132,12 +132,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, return -ENOMEM; spin_lock(&mm->page_table_lock); -#ifdef CONFIG_PPC64 - if (!hugepd_none(*hpdp)) - kmem_cache_free(cachep, new); - else - hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; -#else +#ifdef CONFIG_PPC_FSL_BOOK3E /* * We have multiple higher-level entries that point to the same * actual pte location. Fill in each as we go and backtrack on error. @@ -156,11 +151,28 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, hpdp->pd = 0; kmem_cache_free(cachep, new); } +#else + if (!hugepd_none(*hpdp)) + kmem_cache_free(cachep, new); + else + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; #endif spin_unlock(&mm->page_table_lock); return 0; } +/* + * These macros define how to determine which level of the page table holds + * the hpdp. + */ +#ifdef CONFIG_PPC_FSL_BOOK3E +#define HUGEPD_PGD_SHIFT PGDIR_SHIFT +#define HUGEPD_PUD_SHIFT PUD_SHIFT +#else +#define HUGEPD_PGD_SHIFT PUD_SHIFT +#define HUGEPD_PUD_SHIFT PMD_SHIFT +#endif + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pg; @@ -173,12 +185,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); - if (pshift >= PUD_SHIFT) { + + if (pshift >= HUGEPD_PGD_SHIFT) { hpdp = (hugepd_t *)pg; } else { pdshift = PUD_SHIFT; pu = pud_alloc(mm, pg, addr); - if (pshift >= PMD_SHIFT) { + if (pshift >= HUGEPD_PUD_SHIFT) { hpdp = (hugepd_t *)pu; } else { pdshift = PMD_SHIFT; @@ -198,7 +211,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(hpdp, addr, pdshift); } -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy or bootmem allocator is setup. */ @@ -318,7 +331,7 @@ void __init reserve_hugetlb_gpages(void) } } -#else /* PPC64 */ +#else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy or bootmem allocator is setup. @@ -356,7 +369,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_FSL_BOOK3E #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -416,11 +429,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif unsigned long pdmask = ~((1UL << pdshift) - 1); unsigned int num_hugepd = 1; -#ifdef CONFIG_PPC64 - unsigned int shift = hugepd_shift(*hpdp); -#else - /* Note: On 32-bit the hpdp may be the first of several */ +#ifdef CONFIG_PPC_FSL_BOOK3E + /* Note: On fsl the hpdp may be the first of several */ num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); +#else + unsigned int shift = hugepd_shift(*hpdp); #endif start &= pdmask; @@ -438,10 +451,11 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif hpdp->pd = 0; tlb->need_flush = 1; -#ifdef CONFIG_PPC64 - pgtable_free_tlb(tlb, hugepte, pdshift - shift); -#else + +#ifdef CONFIG_PPC_FSL_BOOK3E hugepd_free(tlb, hugepte); +#else + pgtable_free_tlb(tlb, hugepte, pdshift - shift); #endif } @@ -454,14 +468,23 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start; start = addr; - pmd = pmd_offset(pud, addr); do { + pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) continue; +#ifdef CONFIG_PPC_FSL_BOOK3E + /* + * Increment next by the size of the huge mapping since + * there may be more than one entry at this level for a + * single hugepage, but all of them point to + * the same kmem cache that holds the hugepte. + */ + next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); +#endif free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, addr, next, floor, ceiling); - } while (pmd++, addr = next, addr != end); + } while (addr = next, addr != end); start &= PUD_MASK; if (start < floor) @@ -488,8 +511,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start; start = addr; - pud = pud_offset(pgd, addr); do { + pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); if (!is_hugepd(pud)) { if (pud_none_or_clear_bad(pud)) @@ -497,10 +520,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); } else { +#ifdef CONFIG_PPC_FSL_BOOK3E + /* + * Increment next by the size of the huge mapping since + * there may be more than one entry at this level for a + * single hugepage, but all of them point to + * the same kmem cache that holds the hugepte. + */ + next = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); +#endif free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, addr, next, floor, ceiling); } - } while (pud++, addr = next, addr != end); + } while (addr = next, addr != end); start &= PGDIR_MASK; if (start < floor) @@ -555,12 +587,12 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { -#ifdef CONFIG_PPC32 +#ifdef CONFIG_PPC_FSL_BOOK3E /* * Increment next by the size of the huge mapping since - * on 32-bit there may be more than one entry at the pgd - * level for a single hugepage, but all of them point to - * the same kmem cache that holds the hugepte. + * there may be more than one entry at the pgd level + * for a single hugepage, but all of them point to the + * same kmem cache that holds the hugepte. */ next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); #endif @@ -698,19 +730,17 @@ int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, return 1; } +#ifdef CONFIG_PPC_MM_SLICES unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { -#ifdef CONFIG_PPC_MM_SLICES struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); -#else - return get_unmapped_area(file, addr, len, pgoff, flags); -#endif } +#endif unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { @@ -784,7 +814,7 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_FSL_BOOK3E struct kmem_cache *hugepte_cache; static int __init hugetlbpage_init(void) { diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c new file mode 100644 index 00000000000..5d9a59eaad9 --- /dev/null +++ b/arch/powerpc/mm/icswx.c @@ -0,0 +1,273 @@ +/* + * ICSWX and ACOP Management + * + * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/uaccess.h> + +#include "icswx.h" + +/* + * The processor and its L2 cache cause the icswx instruction to + * generate a COP_REQ transaction on PowerBus. The transaction has no + * address, and the processor does not perform an MMU access to + * authenticate the transaction. The command portion of the PowerBus + * COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor + * Process ID (PID), which the coprocessor compares to the authorized + * LPID and PID held in the coprocessor, to determine if the process + * is authorized to generate the transaction. The data of the COP_REQ + * transaction is 128-byte or less in size and is placed in cacheable + * memory on a 128-byte cache line boundary. + * + * The task to use a coprocessor should use use_cop() to mark the use + * of the Coprocessor Type (CT) and context switching. On a server + * class processor, the PID register is used only for coprocessor + * management + * and so a coprocessor PID is allocated before + * executing icswx + * instruction. Drop_cop() is used to free the + * coprocessor PID. + * + * Example: + * Host Fabric Interface (HFI) is a PowerPC network coprocessor. + * Each HFI have multiple windows. Each HFI window serves as a + * network device sending to and receiving from HFI network. + * HFI immediate send function uses icswx instruction. The immediate + * send function allows small (single cache-line) packets be sent + * without using the regular HFI send FIFO and doorbell, which are + * much slower than immediate send. + * + * For each task intending to use HFI immediate send, the HFI driver + * calls use_cop() to obtain a coprocessor PID for the task. + * The HFI driver then allocate a free HFI window and save the + * coprocessor PID to the HFI window to allow the task to use the + * HFI window. + * + * The HFI driver repeatedly creates immediate send packets and + * issues icswx instruction to send data through the HFI window. + * The HFI compares the coprocessor PID in the CPU PID register + * to the PID held in the HFI window to determine if the transaction + * is allowed. + * + * When the task to release the HFI window, the HFI driver calls + * drop_cop() to release the coprocessor PID. + */ + +void switch_cop(struct mm_struct *next) +{ +#ifdef CONFIG_ICSWX_PID + mtspr(SPRN_PID, next->context.cop_pid); +#endif + mtspr(SPRN_ACOP, next->context.acop); +} + +/** + * Start using a coprocessor. + * @acop: mask of coprocessor to be used. + * @mm: The mm the coprocessor to associate with. Most likely current mm. + * + * Return a positive PID if successful. Negative errno otherwise. + * The returned PID will be fed to the coprocessor to determine if an + * icswx transaction is authenticated. + */ +int use_cop(unsigned long acop, struct mm_struct *mm) +{ + int ret; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) + return -ENODEV; + + if (!mm || !acop) + return -EINVAL; + + /* The page_table_lock ensures mm_users won't change under us */ + spin_lock(&mm->page_table_lock); + spin_lock(mm->context.cop_lockp); + + ret = get_cop_pid(mm); + if (ret < 0) + goto out; + + /* update acop */ + mm->context.acop |= acop; + + sync_cop(mm); + + /* + * If this is a threaded process then there might be other threads + * running. We need to send an IPI to force them to pick up any + * change in PID and ACOP. + */ + if (atomic_read(&mm->mm_users) > 1) + smp_call_function(sync_cop, mm, 1); + +out: + spin_unlock(mm->context.cop_lockp); + spin_unlock(&mm->page_table_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(use_cop); + +/** + * Stop using a coprocessor. + * @acop: mask of coprocessor to be stopped. + * @mm: The mm the coprocessor associated with. + */ +void drop_cop(unsigned long acop, struct mm_struct *mm) +{ + int free_pid; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) + return; + + if (WARN_ON_ONCE(!mm)) + return; + + /* The page_table_lock ensures mm_users won't change under us */ + spin_lock(&mm->page_table_lock); + spin_lock(mm->context.cop_lockp); + + mm->context.acop &= ~acop; + + free_pid = disable_cop_pid(mm); + sync_cop(mm); + + /* + * If this is a threaded process then there might be other threads + * running. We need to send an IPI to force them to pick up any + * change in PID and ACOP. + */ + if (atomic_read(&mm->mm_users) > 1) + smp_call_function(sync_cop, mm, 1); + + if (free_pid != COP_PID_NONE) + free_cop_pid(free_pid); + + spin_unlock(mm->context.cop_lockp); + spin_unlock(&mm->page_table_lock); +} +EXPORT_SYMBOL_GPL(drop_cop); + +static int acop_use_cop(int ct) +{ + /* todo */ + return -1; +} + +/* + * Get the instruction word at the NIP + */ +static u32 acop_get_inst(struct pt_regs *regs) +{ + u32 inst; + u32 __user *p; + + p = (u32 __user *)regs->nip; + if (!access_ok(VERIFY_READ, p, sizeof(*p))) + return 0; + + if (__get_user(inst, p)) + return 0; + + return inst; +} + +/** + * @regs: regsiters at time of interrupt + * @address: storage address + * @error_code: Fault code, usually the DSISR or ESR depending on + * processor type + * + * Return 0 if we are able to resolve the data storage fault that + * results from a CT miss in the ACOP register. + */ +int acop_handle_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + int ct; + u32 inst = 0; + + if (!cpu_has_feature(CPU_FTR_ICSWX)) { + pr_info("No coprocessors available"); + _exception(SIGILL, regs, ILL_ILLOPN, address); + } + + if (!user_mode(regs)) { + /* this could happen if the HV denies the + * kernel access, for now we just die */ + die("ICSWX from kernel failed", regs, SIGSEGV); + } + + /* Some implementations leave us a hint for the CT */ + ct = ICSWX_GET_CT_HINT(error_code); + if (ct < 0) { + /* we have to peek at the instruction word to figure out CT */ + u32 ccw; + u32 rs; + + inst = acop_get_inst(regs); + if (inst == 0) + return -1; + + rs = (inst >> (31 - 10)) & 0x1f; + ccw = regs->gpr[rs]; + ct = (ccw >> 16) & 0x3f; + } + + if (!acop_use_cop(ct)) + return 0; + + /* at this point the CT is unknown to the system */ + pr_warn("%s[%d]: Coprocessor %d is unavailable", + current->comm, current->pid, ct); + + /* get inst if we don't already have it */ + if (inst == 0) { + inst = acop_get_inst(regs); + if (inst == 0) + return -1; + } + + /* Check if the instruction is the "record form" */ + if (inst & 1) { + /* + * the instruction is "record" form so we can reject + * using CR0 + */ + regs->ccr &= ~(0xful << 28); + regs->ccr |= ICSWX_RC_NOT_FOUND << 28; + + /* Move on to the next instruction */ + regs->nip += 4; + } else { + /* + * There is no architected mechanism to report a bad + * CT so we could either SIGILL or report nothing. + * Since the non-record version should only bu used + * for "hints" or "don't care" we should probably do + * nothing. However, I could see how some people + * might want an SIGILL so it here if you want it. + */ +#ifdef CONFIG_PPC_ICSWX_USE_SIGILL + _exception(SIGILL, regs, ILL_ILLOPN, address); +#else + regs->nip += 4; +#endif + } + + return 0; +} +EXPORT_SYMBOL_GPL(acop_handle_fault); diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h new file mode 100644 index 00000000000..42176bd0884 --- /dev/null +++ b/arch/powerpc/mm/icswx.h @@ -0,0 +1,62 @@ +#ifndef _ARCH_POWERPC_MM_ICSWX_H_ +#define _ARCH_POWERPC_MM_ICSWX_H_ + +/* + * ICSWX and ACOP Management + * + * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/mmu_context.h> + +/* also used to denote that PIDs are not used */ +#define COP_PID_NONE 0 + +static inline void sync_cop(void *arg) +{ + struct mm_struct *mm = arg; + + if (mm == current->active_mm) + switch_cop(current->active_mm); +} + +#ifdef CONFIG_PPC_ICSWX_PID +extern int get_cop_pid(struct mm_struct *mm); +extern int disable_cop_pid(struct mm_struct *mm); +extern void free_cop_pid(int free_pid); +#else +#define get_cop_pid(m) (COP_PID_NONE) +#define disable_cop_pid(m) (COP_PID_NONE) +#define free_cop_pid(p) +#endif + +/* + * These are implementation bits for architected registers. If this + * ever becomes architecture the should be moved to reg.h et. al. + */ +/* UCT is the same bit for Server and Embedded */ +#define ICSWX_DSI_UCT 0x00004000 /* Unavailable Coprocessor Type */ + +#ifdef CONFIG_PPC_BOOK3E +/* Embedded implementation gives us no hints as to what the CT is */ +#define ICSWX_GET_CT_HINT(x) (-1) +#else +/* Server implementation contains the CT value in the DSISR */ +#define ICSWX_DSISR_CTMASK 0x00003f00 +#define ICSWX_GET_CT_HINT(x) (((x) & ICSWX_DSISR_CTMASK) >> 8) +#endif + +#define ICSWX_RC_STARTED 0x8 /* The request has been started */ +#define ICSWX_RC_NOT_IDLE 0x4 /* No coprocessor found idle */ +#define ICSWX_RC_NOT_FOUND 0x2 /* No coprocessor found */ +#define ICSWX_RC_UNDEFINED 0x1 /* Reserved */ + +extern int acop_handle_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code); +#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */ diff --git a/arch/powerpc/mm/icswx_pid.c b/arch/powerpc/mm/icswx_pid.c new file mode 100644 index 00000000000..91e30eb7d05 --- /dev/null +++ b/arch/powerpc/mm/icswx_pid.c @@ -0,0 +1,87 @@ +/* + * ICSWX and ACOP/PID Management + * + * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/module.h> +#include "icswx.h" + +#define COP_PID_MIN (COP_PID_NONE + 1) +#define COP_PID_MAX (0xFFFF) + +static DEFINE_SPINLOCK(mmu_context_acop_lock); +static DEFINE_IDA(cop_ida); + +static int new_cop_pid(struct ida *ida, int min_id, int max_id, + spinlock_t *lock) +{ + int index; + int err; + +again: + if (!ida_pre_get(ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(lock); + err = ida_get_new_above(ida, min_id, &index); + spin_unlock(lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > max_id) { + spin_lock(lock); + ida_remove(ida, index); + spin_unlock(lock); + return -ENOMEM; + } + + return index; +} + +int get_cop_pid(struct mm_struct *mm) +{ + int pid; + + if (mm->context.cop_pid == COP_PID_NONE) { + pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX, + &mmu_context_acop_lock); + if (pid >= 0) + mm->context.cop_pid = pid; + } + return mm->context.cop_pid; +} + +int disable_cop_pid(struct mm_struct *mm) +{ + int free_pid = COP_PID_NONE; + + if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) { + free_pid = mm->context.cop_pid; + mm->context.cop_pid = COP_PID_NONE; + } + return free_pid; +} + +void free_cop_pid(int free_pid) +{ + spin_lock(&mmu_context_acop_lock); + ida_remove(&cop_ida, free_pid); + spin_unlock(&mmu_context_acop_lock); +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 161cefde5c1..6157be2a704 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -65,6 +65,13 @@ phys_addr_t memstart_addr = (phys_addr_t)~0ull; EXPORT_SYMBOL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL(kernstart_addr); + +#ifdef CONFIG_RELOCATABLE_PPC32 +/* Used in __va()/__pa() */ +long long virt_phys_offset; +EXPORT_SYMBOL(virt_phys_offset); +#endif + phys_addr_t lowmem_end_addr; int boot_mapsize; @@ -134,8 +141,7 @@ void __init MMU_init(void) if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII - memblock.memory.cnt = 1; - memblock_analyze(); + memblock_enforce_memory_limit(memblock.memory.regions[0].size); printk(KERN_WARNING "Only using first contiguous memory region"); #else wii_memory_fixups(); @@ -158,7 +164,6 @@ void __init MMU_init(void) #ifndef CONFIG_HIGHMEM total_memory = total_lowmem; memblock_enforce_memory_limit(total_lowmem); - memblock_analyze(); #endif /* CONFIG_HIGHMEM */ } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2dd6bdd31fe..d974b79a306 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -51,6 +51,7 @@ #include <asm/vdso.h> #include <asm/fixmap.h> #include <asm/swiotlb.h> +#include <asm/rtas.h> #include "mmu_decl.h" @@ -199,7 +200,7 @@ void __init do_init_bootmem(void) unsigned long start_pfn, end_pfn; start_pfn = memblock_region_memory_base_pfn(reg); end_pfn = memblock_region_memory_end_pfn(reg); - add_active_range(0, start_pfn, end_pfn); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); } /* Add all physical memory to the bootmem map, mark each area @@ -553,7 +554,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ && defined(CONFIG_HUGETLB_PAGE) if (is_vm_hugetlb_page(vma)) - book3e_hugetlb_preload(vma->vm_mm, address, *ptep); + book3e_hugetlb_preload(vma, address, *ptep); #endif } @@ -585,3 +586,23 @@ static int add_system_ram_resources(void) return 0; } subsys_initcall(add_system_ram_resources); + +#ifdef CONFIG_STRICT_DEVMEM +/* + * devmem_is_allowed(): check to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * Access has to be given to non-kernel-ram areas as well, these contain the + * PCI mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pfn) +{ + if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pfn)) + return 1; + if (page_is_rtas_user_buf(pfn)) + return 1; + return 0; +} +#endif /* CONFIG_STRICT_DEVMEM */ diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 5a783d8e8e8..67a42ed0d2f 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c @@ -53,14 +53,6 @@ static inline int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -/* - * Since get_random_int() returns the same value within a 1 jiffy window, - * we will almost always get the same randomisation for the stack and mmap - * region. This will mean the relative distance between stack and mmap will - * be the same. - * - * To avoid this we can shift the randomness by 1 bit. - */ static unsigned long mmap_rnd(void) { unsigned long rnd = 0; @@ -68,11 +60,11 @@ static unsigned long mmap_rnd(void) if (current->flags & PF_RANDOMIZE) { /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(22-PAGE_SHIFT))); + rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); else - rnd = (long)(get_random_int() % (1<<(29-PAGE_SHIFT))); + rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); } - return (rnd << PAGE_SHIFT) * 2; + return rnd << PAGE_SHIFT; } static inline unsigned long mmap_base(void) diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index ca988a3d5fb..40677aa0190 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -24,200 +24,7 @@ #include <asm/mmu_context.h> -#ifdef CONFIG_PPC_ICSWX -/* - * The processor and its L2 cache cause the icswx instruction to - * generate a COP_REQ transaction on PowerBus. The transaction has - * no address, and the processor does not perform an MMU access - * to authenticate the transaction. The command portion of the - * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and - * the coprocessor Process ID (PID), which the coprocessor compares - * to the authorized LPID and PID held in the coprocessor, to determine - * if the process is authorized to generate the transaction. - * The data of the COP_REQ transaction is 128-byte or less and is - * placed in cacheable memory on a 128-byte cache line boundary. - * - * The task to use a coprocessor should use use_cop() to allocate - * a coprocessor PID before executing icswx instruction. use_cop() - * also enables the coprocessor context switching. Drop_cop() is - * used to free the coprocessor PID. - * - * Example: - * Host Fabric Interface (HFI) is a PowerPC network coprocessor. - * Each HFI have multiple windows. Each HFI window serves as a - * network device sending to and receiving from HFI network. - * HFI immediate send function uses icswx instruction. The immediate - * send function allows small (single cache-line) packets be sent - * without using the regular HFI send FIFO and doorbell, which are - * much slower than immediate send. - * - * For each task intending to use HFI immediate send, the HFI driver - * calls use_cop() to obtain a coprocessor PID for the task. - * The HFI driver then allocate a free HFI window and save the - * coprocessor PID to the HFI window to allow the task to use the - * HFI window. - * - * The HFI driver repeatedly creates immediate send packets and - * issues icswx instruction to send data through the HFI window. - * The HFI compares the coprocessor PID in the CPU PID register - * to the PID held in the HFI window to determine if the transaction - * is allowed. - * - * When the task to release the HFI window, the HFI driver calls - * drop_cop() to release the coprocessor PID. - */ - -#define COP_PID_NONE 0 -#define COP_PID_MIN (COP_PID_NONE + 1) -#define COP_PID_MAX (0xFFFF) - -static DEFINE_SPINLOCK(mmu_context_acop_lock); -static DEFINE_IDA(cop_ida); - -void switch_cop(struct mm_struct *next) -{ - mtspr(SPRN_PID, next->context.cop_pid); - mtspr(SPRN_ACOP, next->context.acop); -} - -static int new_cop_pid(struct ida *ida, int min_id, int max_id, - spinlock_t *lock) -{ - int index; - int err; - -again: - if (!ida_pre_get(ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(lock); - err = ida_get_new_above(ida, min_id, &index); - spin_unlock(lock); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > max_id) { - spin_lock(lock); - ida_remove(ida, index); - spin_unlock(lock); - return -ENOMEM; - } - - return index; -} - -static void sync_cop(void *arg) -{ - struct mm_struct *mm = arg; - - if (mm == current->active_mm) - switch_cop(current->active_mm); -} - -/** - * Start using a coprocessor. - * @acop: mask of coprocessor to be used. - * @mm: The mm the coprocessor to associate with. Most likely current mm. - * - * Return a positive PID if successful. Negative errno otherwise. - * The returned PID will be fed to the coprocessor to determine if an - * icswx transaction is authenticated. - */ -int use_cop(unsigned long acop, struct mm_struct *mm) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) - return -ENODEV; - - if (!mm || !acop) - return -EINVAL; - - /* The page_table_lock ensures mm_users won't change under us */ - spin_lock(&mm->page_table_lock); - spin_lock(mm->context.cop_lockp); - - if (mm->context.cop_pid == COP_PID_NONE) { - ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX, - &mmu_context_acop_lock); - if (ret < 0) - goto out; - - mm->context.cop_pid = ret; - } - mm->context.acop |= acop; - - sync_cop(mm); - - /* - * If this is a threaded process then there might be other threads - * running. We need to send an IPI to force them to pick up any - * change in PID and ACOP. - */ - if (atomic_read(&mm->mm_users) > 1) - smp_call_function(sync_cop, mm, 1); - - ret = mm->context.cop_pid; - -out: - spin_unlock(mm->context.cop_lockp); - spin_unlock(&mm->page_table_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(use_cop); - -/** - * Stop using a coprocessor. - * @acop: mask of coprocessor to be stopped. - * @mm: The mm the coprocessor associated with. - */ -void drop_cop(unsigned long acop, struct mm_struct *mm) -{ - int free_pid = COP_PID_NONE; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) - return; - - if (WARN_ON_ONCE(!mm)) - return; - - /* The page_table_lock ensures mm_users won't change under us */ - spin_lock(&mm->page_table_lock); - spin_lock(mm->context.cop_lockp); - - mm->context.acop &= ~acop; - - if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) { - free_pid = mm->context.cop_pid; - mm->context.cop_pid = COP_PID_NONE; - } - - sync_cop(mm); - - /* - * If this is a threaded process then there might be other threads - * running. We need to send an IPI to force them to pick up any - * change in PID and ACOP. - */ - if (atomic_read(&mm->mm_users) > 1) - smp_call_function(sync_cop, mm, 1); - - if (free_pid != COP_PID_NONE) { - spin_lock(&mmu_context_acop_lock); - ida_remove(&cop_ida, free_pid); - spin_unlock(&mmu_context_acop_lock); - } - - spin_unlock(mm->context.cop_lockp); - spin_unlock(&mm->page_table_lock); -} -EXPORT_SYMBOL_GPL(drop_cop); - -#endif /* CONFIG_PPC_ICSWX */ +#include "icswx.h" static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 261adbd3b55..3feefc3842a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -58,7 +58,7 @@ static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * - * Note: node_to_cpumask() is not valid until after this is done. + * Note: cpumask_of_node() is not valid until after this is done. */ static void __init setup_node_to_cpumask_map(void) { @@ -386,7 +386,7 @@ static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) of_node_put(memory); } -static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) +static unsigned long read_n_cells(int n, const unsigned int **buf) { unsigned long result = 0; @@ -501,7 +501,7 @@ static int of_get_assoc_arrays(struct device_node *memory, aa->n_arrays = *prop++; aa->array_sz = *prop++; - /* Now that we know the number of arrrays and size of each array, + /* Now that we know the number of arrays and size of each array, * revalidate the size of the property read in. */ if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) @@ -690,9 +690,7 @@ static void __init parse_drconf_memory(struct device_node *memory) node_set_online(nid); sz = numa_enforce_memory_limit(base, size); if (sz) - add_active_range(nid, base >> PAGE_SHIFT, - (base >> PAGE_SHIFT) - + (sz >> PAGE_SHIFT)); + memblock_set_node(base, sz, nid); } while (--ranges); } } @@ -782,8 +780,7 @@ new_range: continue; } - add_active_range(nid, start >> PAGE_SHIFT, - (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); + memblock_set_node(start, size, nid); if (--ranges) goto new_range; @@ -819,7 +816,8 @@ static void __init setup_nonnuma(void) end_pfn = memblock_region_memory_end_pfn(reg); fake_numa_create_new_node(end_pfn, &nid); - add_active_range(nid, start_pfn, end_pfn); + memblock_set_node(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), nid); node_set_online(nid); } } @@ -949,7 +947,7 @@ static struct notifier_block __cpuinitdata ppc64_numa_nb = { .priority = 1 /* Must run before sched domains notifier. */ }; -static void mark_reserved_regions_for_nid(int nid) +static void __init mark_reserved_regions_for_nid(int nid) { struct pglist_data *node = NODE_DATA(nid); struct memblock_region *reg; @@ -1442,7 +1440,7 @@ int arch_update_cpu_topology(void) { int cpu, nid, old_nid; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; - struct sys_device *sysdev; + struct device *dev; for_each_cpu(cpu,&cpu_associativity_changes_mask) { vphn_get_associativity(cpu, associativity); @@ -1463,9 +1461,9 @@ int arch_update_cpu_topology(void) register_cpu_under_node(cpu, nid); put_online_cpus(); - sysdev = get_cpu_sysdev(cpu); - if (sysdev) - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + if (dev) + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return 1; diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index dc4a5f385e4..ff672bd8fea 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -94,11 +94,11 @@ srdi r15,r16,60 /* get region */ rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 - bne- dtlb_miss_fault_bolted + bne- dtlb_miss_fault_bolted /* Bail if fault addr is invalid */ rlwinm r10,r11,32-19,27,27 rlwimi r10,r11,32-16,19,19 - cmpwi r15,0 + cmpwi r15,0 /* user vs kernel check */ ori r10,r10,_PAGE_PRESENT oris r11,r10,_PAGE_ACCESSED@h @@ -120,44 +120,38 @@ tlb_miss_common_bolted: rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 cmpldi cr0,r14,0 clrrdi r15,r15,3 - beq tlb_miss_fault_bolted + beq tlb_miss_fault_bolted /* No PGDIR, bail */ BEGIN_MMU_FTR_SECTION /* Set the TLB reservation and search for existing entry. Then load * the entry. */ PPC_TLBSRX_DOT(0,r16) - ldx r14,r14,r15 - beq normal_tlb_miss_done + ldx r14,r14,r15 /* grab pgd entry */ + beq normal_tlb_miss_done /* tlb exists already, bail */ MMU_FTR_SECTION_ELSE - ldx r14,r14,r15 + ldx r14,r14,r15 /* grab pgd entry */ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) #ifndef CONFIG_PPC_64K_PAGES rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 - - cmpldi cr0,r14,0 - beq tlb_miss_fault_bolted - - ldx r14,r14,r15 + cmpdi cr0,r14,0 + bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */ + ldx r14,r14,r15 /* grab pud entry */ #endif /* CONFIG_PPC_64K_PAGES */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 - - cmpldi cr0,r14,0 - beq tlb_miss_fault_bolted - - ldx r14,r14,r15 + cmpdi cr0,r14,0 + bge tlb_miss_fault_bolted + ldx r14,r14,r15 /* Grab pmd entry */ rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3 clrrdi r15,r15,3 - - cmpldi cr0,r14,0 - beq tlb_miss_fault_bolted - - ldx r14,r14,r15 + cmpdi cr0,r14,0 + bge tlb_miss_fault_bolted + ldx r14,r14,r15 /* Grab PTE, normal (!huge) page */ /* Check if required permissions are met */ andc. r15,r11,r14 diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 4e13d6f9023..df32a838dcf 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -52,7 +52,7 @@ * indirect page table entries. */ #ifdef CONFIG_PPC_BOOK3E_MMU -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_FSL_BOOK3E struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, @@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu) /* limit memory so we dont have linear faults */ memblock_enforce_memory_limit(linear_map_top); - memblock_analyze(); patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); |