diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 107 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 119 | ||||
-rw-r--r-- | arch/powerpc/mm/lmb.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 21 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/subpage-prot.c | 213 |
11 files changed, 445 insertions, 67 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 20629ae95c5..41649a5d360 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8135da06e0a..7b251079926 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -167,10 +167,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (notify_page_fault(regs)) return 0; - if (trap == 0x300) { - if (debugger_fault_handler(regs)) - return 0; - } + if (unlikely(debugger_fault_handler(regs))) + return 0; /* On a kernel SLB miss we can only check for a valid exception entry */ if (!user_mode(regs) && (address >= TASK_SIZE)) @@ -189,7 +187,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, return SIGSEGV; /* in_atomic() in user mode is really bad, as is current->mm == NULL. */ - printk(KERN_EMERG "Page fault in user mode with" + printk(KERN_EMERG "Page fault in user mode with " "in_atomic() = %d mm = %p\n", in_atomic(), mm); printk(KERN_EMERG "NIP = %lx MSR = %lx\n", regs->nip, regs->msr); diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 17139daeaff..c93a966b7e4 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -165,15 +165,15 @@ void invalidate_tlbcam_entry(int index) void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1, unsigned long cam2) { - settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0); + settlbcam(0, PAGE_OFFSET, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0); tlbcam_index++; if (cam1) { tlbcam_index++; - settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0); + settlbcam(1, PAGE_OFFSET+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0); } if (cam2) { tlbcam_index++; - settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0); + settlbcam(2, PAGE_OFFSET+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0); } } diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e935edd6b72..21d24848647 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -331,7 +331,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, int local, int ssize, + * int subpg_prot) */ /* @@ -429,12 +430,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) xor r28,r28,r0 /* hash */ /* Convert linux PTE bits into HW equivalents */ -4: andi. r3,r30,0x1fe /* Get basic set of flags */ - xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ +4: +#ifdef CONFIG_PPC_SUBPAGE_PROT + andc r10,r30,r10 + andi. r3,r10,0x1fe /* Get basic set of flags */ + rlwinm r0,r10,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ +#else + andi. r3,r30,0x1fe /* Get basic set of flags */ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ +#endif + xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ - andc r0,r30,r0 /* r0 = pte & ~r0 */ + andc r0,r3,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f09730bf3a3..32f416175db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -96,6 +96,7 @@ int mmu_vmalloc_psize = MMU_PAGE_4K; int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; +u16 mmu_slb_size = 64; #ifdef CONFIG_HUGETLB_PAGE int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; @@ -368,18 +369,11 @@ static void __init htab_init_page_sizes(void) * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) - mmu_huge_psize = MMU_PAGE_16M; + set_huge_psize(MMU_PAGE_16M); /* With 4k/4level pagetables, we can't (for now) cope with a * huge page size < PMD_SIZE */ else if (mmu_psize_defs[MMU_PAGE_1M].shift) - mmu_huge_psize = MMU_PAGE_1M; - - /* Calculate HPAGE_SHIFT and sanity check it */ - if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT && - mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT) - HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift; - else - HPAGE_SHIFT = 0; /* No huge pages dude ! */ + set_huge_psize(MMU_PAGE_1M); #endif /* CONFIG_HUGETLB_PAGE */ } @@ -477,7 +471,7 @@ void __init htab_initialize(void) unsigned long table; unsigned long pteg_count; unsigned long mode_rw; - unsigned long base = 0, size = 0; + unsigned long base = 0, size = 0, limit; int i; extern unsigned long tce_alloc_start, tce_alloc_end; @@ -511,9 +505,15 @@ void __init htab_initialize(void) _SDR1 = 0; } else { /* Find storage for the HPT. Must be contiguous in - * the absolute address space. + * the absolute address space. On cell we want it to be + * in the first 1 Gig. */ - table = lmb_alloc(htab_size_bytes, htab_size_bytes); + if (machine_is(cell)) + limit = 0x40000000; + else + limit = 0; + + table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); @@ -643,7 +643,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) * For now this makes the whole process use 4k pages. */ #ifdef CONFIG_PPC_64K_PAGES -static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) +void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { if (mm->context.user_psize == MMU_PAGE_4K) return; @@ -651,13 +651,62 @@ static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif + if (get_paca()->context.user_psize != MMU_PAGE_4K) { + get_paca()->context = mm->context; + slb_flush_and_rebolt(); + } } #endif /* CONFIG_PPC_64K_PAGES */ +#ifdef CONFIG_PPC_SUBPAGE_PROT +/* + * This looks up a 2-bit protection code for a 4k subpage of a 64k page. + * Userspace sets the subpage permissions using the subpage_prot system call. + * + * Result is 0: full permissions, _PAGE_RW: read-only, + * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. + */ +static int subpage_protection(pgd_t *pgdir, unsigned long ea) +{ + struct subpage_prot_table *spt = pgd_subpage_prot(pgdir); + u32 spp = 0; + u32 **sbpm, *sbpp; + + if (ea >= spt->maxaddr) + return 0; + if (ea < 0x100000000) { + /* addresses below 4GB use spt->low_prot */ + sbpm = spt->low_prot; + } else { + sbpm = spt->protptrs[ea >> SBP_L3_SHIFT]; + if (!sbpm) + return 0; + } + sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; + if (!sbpp) + return 0; + spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)]; + + /* extract 2-bit bitfield for this 4k subpage */ + spp >>= 30 - 2 * ((ea >> 12) & 0xf); + + /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */ + spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0); + return spp; +} + +#else /* CONFIG_PPC_SUBPAGE_PROT */ +static inline int subpage_protection(pgd_t *pgdir, unsigned long ea) +{ + return 0; +} +#endif + /* Result code is: * 0 - handled * 1 - normal page fault * -1 - critical hash insertion error + * -2 - access not permitted by subpage protection mechanism */ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { @@ -808,7 +857,14 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize); + { + int spp = subpage_protection(pgdir, ea); + if (access & spp) + rc = -2; + else + rc = __hash_page_4K(ea, access, vsid, ptep, trap, + local, ssize, spp); + } #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); @@ -880,7 +936,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize); + __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, + subpage_protection(pgdir, ea)); local_irq_restore(flags); } @@ -925,19 +982,17 @@ void flush_hash_range(unsigned long number, int local) * low_hash_fault is called when we the low level hash code failed * to instert a PTE due to an hypervisor error */ -void low_hash_fault(struct pt_regs *regs, unsigned long address) +void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) { if (user_mode(regs)) { - siginfo_t info; - - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return; - } - bad_page_fault(regs, address, SIGBUS); +#ifdef CONFIG_PPC_SUBPAGE_PROT + if (rc == -2) + _exception(SIGSEGV, regs, SEGV_ACCERR, address); + else +#endif + _exception(SIGBUS, regs, BUS_ADRERR, address); + } else + bad_page_fault(regs, address, SIGBUS); } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 71efb38d599..a02266dad21 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,18 +24,17 @@ #include <asm/cputable.h> #include <asm/spu.h> +#define HPAGE_SHIFT_64K 16 +#define HPAGE_SHIFT_16M 24 + #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) -#ifdef CONFIG_PPC_64K_PAGES -#define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT) -#else -#define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT) -#endif -#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) -#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE) +unsigned int hugepte_shift; +#define PTRS_PER_HUGEPTE (1 << hugepte_shift) +#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) -#define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE) +#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) @@ -82,11 +81,35 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, return 0; } +/* Base page size affects how we walk hugetlb page tables */ +#ifdef CONFIG_PPC_64K_PAGES +#define hpmd_offset(pud, addr) pmd_offset(pud, addr) +#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) +#else +static inline +pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) +{ + if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + return pmd_offset(pud, addr); + else + return (pmd_t *) pud; +} +static inline +pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) +{ + if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + return pmd_alloc(mm, pud, addr); + else + return (pmd_t *) pud; +} +#endif + /* Modelled after find_linux_pte() */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pg; pud_t *pu; + pmd_t *pm; BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); @@ -96,14 +119,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { -#ifdef CONFIG_PPC_64K_PAGES - pmd_t *pm; - pm = pmd_offset(pu, addr); + pm = hpmd_offset(pu, addr); if (!pmd_none(*pm)) return hugepte_offset((hugepd_t *)pm, addr); -#else - return hugepte_offset((hugepd_t *)pu, addr); -#endif } } @@ -114,6 +132,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pg; pud_t *pu; + pmd_t *pm; hugepd_t *hpdp = NULL; BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); @@ -124,14 +143,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pu = pud_alloc(mm, pg, addr); if (pu) { -#ifdef CONFIG_PPC_64K_PAGES - pmd_t *pm; - pm = pmd_alloc(mm, pu, addr); + pm = hpmd_alloc(mm, pu, addr); if (pm) hpdp = (hugepd_t *)pm; -#else - hpdp = (hugepd_t *)pu; -#endif } if (! hpdp) @@ -158,7 +172,6 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) PGF_CACHENUM_MASK)); } -#ifdef CONFIG_PPC_64K_PAGES static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -191,7 +204,6 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pud_clear(pud); pmd_free_tlb(tlb, pmd); } -#endif static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long addr, unsigned long end, @@ -210,9 +222,15 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); #else - if (pud_none(*pud)) - continue; - free_hugepte_range(tlb, (hugepd_t *)pud); + if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { + if (pud_none_or_clear_bad(pud)) + continue; + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + } else { + if (pud_none(*pud)) + continue; + free_hugepte_range(tlb, (hugepd_t *)pud); + } #endif } while (pud++, addr = next, addr != end); @@ -526,6 +544,57 @@ repeat: return err; } +void set_huge_psize(int psize) +{ + /* Check that it is a page size supported by the hardware and + * that it fits within pagetable limits. */ + if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && + (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || + mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { + HPAGE_SHIFT = mmu_psize_defs[psize].shift; + mmu_huge_psize = psize; +#ifdef CONFIG_PPC_64K_PAGES + hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); +#else + if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); + else + hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); +#endif + + } else + HPAGE_SHIFT = 0; +} + +static int __init hugepage_setup_sz(char *str) +{ + unsigned long long size; + int mmu_psize = -1; + int shift; + + size = memparse(str, &str); + + shift = __ffs(size); + switch (shift) { +#ifndef CONFIG_PPC_64K_PAGES + case HPAGE_SHIFT_64K: + mmu_psize = MMU_PAGE_64K; + break; +#endif + case HPAGE_SHIFT_16M: + mmu_psize = MMU_PAGE_16M; + break; + } + + if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift) + set_huge_psize(mmu_psize); + else + printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); + + return 1; +} +__setup("hugepagesz=", hugepage_setup_sz); + static void zero_ctor(struct kmem_cache *cache, void *addr) { memset(addr, 0, kmem_cache_size(cache)); diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c index 8f4d2dc4caf..4ce23bcf8a5 100644 --- a/arch/powerpc/mm/lmb.c +++ b/arch/powerpc/mm/lmb.c @@ -342,3 +342,16 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit) } } } + +int __init lmb_is_reserved(unsigned long addr) +{ + int i; + + for (i = 0; i < lmb.reserved.cnt; i++) { + unsigned long upper = lmb.reserved.region[i].base + + lmb.reserved.region[i].size - 1; + if ((addr >= lmb.reserved.region[i].base) && (addr <= upper)) + return 1; + } + return 0; +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5402fb6b3aa..e8122447f01 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -213,15 +213,30 @@ void __init do_init_bootmem(void) */ #ifdef CONFIG_HIGHMEM free_bootmem_with_active_regions(0, total_lowmem >> PAGE_SHIFT); + + /* reserve the sections we're already using */ + for (i = 0; i < lmb.reserved.cnt; i++) { + unsigned long addr = lmb.reserved.region[i].base + + lmb_size_bytes(&lmb.reserved, i) - 1; + if (addr < total_lowmem) + reserve_bootmem(lmb.reserved.region[i].base, + lmb_size_bytes(&lmb.reserved, i)); + else if (lmb.reserved.region[i].base < total_lowmem) { + unsigned long adjusted_size = total_lowmem - + lmb.reserved.region[i].base; + reserve_bootmem(lmb.reserved.region[i].base, + adjusted_size); + } + } #else free_bootmem_with_active_regions(0, max_pfn); -#endif /* reserve the sections we're already using */ for (i = 0; i < lmb.reserved.cnt; i++) reserve_bootmem(lmb.reserved.region[i].base, lmb_size_bytes(&lmb.reserved, i)); +#endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); @@ -334,11 +349,13 @@ void __init mem_init(void) highmem_mapnr = total_lowmem >> PAGE_SHIFT; for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { struct page *page = pfn_to_page(pfn); - + if (lmb_is_reserved(pfn << PAGE_SHIFT)) + continue; ClearPageReserved(page); init_page_count(page); __free_page(page); totalhigh_pages++; + reservedpages--; } totalram_pages += totalhigh_pages; printk(KERN_DEBUG "High memory: %luk\n", diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 50d7372bc2c..47b06bad24a 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -256,6 +256,7 @@ void slb_initialize(void) static int slb_encoding_inited; extern unsigned int *slb_miss_kernel_load_linear; extern unsigned int *slb_miss_kernel_load_io; + extern unsigned int *slb_compare_rr_to_size; /* Prepare our SLB miss handler based on our page size */ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -269,6 +270,8 @@ void slb_initialize(void) SLB_VSID_KERNEL | linear_llp); patch_slb_encoding(slb_miss_kernel_load_io, SLB_VSID_KERNEL | io_llp); + patch_slb_encoding(slb_compare_rr_to_size, + mmu_slb_size); DBG("SLB: linear LLP = %04x\n", linear_llp); DBG("SLB: io LLP = %04x\n", io_llp); diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 1328a81a84a..657f6b37e9d 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -227,8 +227,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) 7: ld r10,PACASTABRR(r13) addi r10,r10,1 - /* use a cpu feature mask if we ever change our slb size */ - cmpldi r10,SLB_NUM_ENTRIES + /* This gets soft patched on boot. */ +_GLOBAL(slb_compare_rr_to_size) + cmpldi r10,0 blt+ 4f li r10,SLB_NUM_BOLTED diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c new file mode 100644 index 00000000000..4cafc0c33d0 --- /dev/null +++ b/arch/powerpc/mm/subpage-prot.c @@ -0,0 +1,213 @@ +/* + * Copyright 2007-2008 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlbflush.h> + +/* + * Free all pages allocated for subpage protection maps and pointers. + * Also makes sure that the subpage_prot_table structure is + * reinitialized for the next user. + */ +void subpage_prot_free(pgd_t *pgd) +{ + struct subpage_prot_table *spt = pgd_subpage_prot(pgd); + unsigned long i, j, addr; + u32 **p; + + for (i = 0; i < 4; ++i) { + if (spt->low_prot[i]) { + free_page((unsigned long)spt->low_prot[i]); + spt->low_prot[i] = NULL; + } + } + addr = 0; + for (i = 0; i < 2; ++i) { + p = spt->protptrs[i]; + if (!p) + continue; + spt->protptrs[i] = NULL; + for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr; + ++j, addr += PAGE_SIZE) + if (p[j]) + free_page((unsigned long)p[j]); + free_page((unsigned long)p); + } + spt->maxaddr = 0; +} + +static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, + int npages) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + + pgd = pgd_offset(mm, addr); + if (pgd_none(*pgd)) + return; + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return; + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + arch_enter_lazy_mmu_mode(); + for (; npages > 0; --npages) { + pte_update(mm, addr, pte, 0, 0); + addr += PAGE_SIZE; + ++pte; + } + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(pte - 1, ptl); +} + +/* + * Clear the subpage protection map for an address range, allowing + * all accesses that are allowed by the pte permissions. + */ +static void subpage_prot_clear(unsigned long addr, unsigned long len) +{ + struct mm_struct *mm = current->mm; + struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd); + u32 **spm, *spp; + int i, nw; + unsigned long next, limit; + + down_write(&mm->mmap_sem); + limit = addr + len; + if (limit > spt->maxaddr) + limit = spt->maxaddr; + for (; addr < limit; addr = next) { + next = pmd_addr_end(addr, limit); + if (addr < 0x100000000) { + spm = spt->low_prot; + } else { + spm = spt->protptrs[addr >> SBP_L3_SHIFT]; + if (!spm) + continue; + } + spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; + if (!spp) + continue; + spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1); + + i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + nw = PTRS_PER_PTE - i; + if (addr + (nw << PAGE_SHIFT) > next) + nw = (next - addr) >> PAGE_SHIFT; + + memset(spp, 0, nw * sizeof(u32)); + + /* now flush any existing HPTEs for the range */ + hpte_flush_range(mm, addr, nw); + } + up_write(&mm->mmap_sem); +} + +/* + * Copy in a subpage protection map for an address range. + * The map has 2 bits per 4k subpage, so 32 bits per 64k page. + * Each 2-bit field is 0 to allow any access, 1 to prevent writes, + * 2 or 3 to prevent all accesses. + * Note that the normal page protections also apply; the subpage + * protection mechanism is an additional constraint, so putting 0 + * in a 2-bit field won't allow writes to a page that is otherwise + * write-protected. + */ +long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) +{ + struct mm_struct *mm = current->mm; + struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd); + u32 **spm, *spp; + int i, nw; + unsigned long next, limit; + int err; + + /* Check parameters */ + if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) || + addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE) + return -EINVAL; + + if (is_hugepage_only_range(mm, addr, len)) + return -EINVAL; + + if (!map) { + /* Clear out the protection map for the address range */ + subpage_prot_clear(addr, len); + return 0; + } + + if (!access_ok(VERIFY_READ, map, (len >> PAGE_SHIFT) * sizeof(u32))) + return -EFAULT; + + down_write(&mm->mmap_sem); + for (limit = addr + len; addr < limit; addr = next) { + next = pmd_addr_end(addr, limit); + err = -ENOMEM; + if (addr < 0x100000000) { + spm = spt->low_prot; + } else { + spm = spt->protptrs[addr >> SBP_L3_SHIFT]; + if (!spm) { + spm = (u32 **)get_zeroed_page(GFP_KERNEL); + if (!spm) + goto out; + spt->protptrs[addr >> SBP_L3_SHIFT] = spm; + } + } + spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1); + spp = *spm; + if (!spp) { + spp = (u32 *)get_zeroed_page(GFP_KERNEL); + if (!spp) + goto out; + *spm = spp; + } + spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1); + + local_irq_disable(); + demote_segment_4k(mm, addr); + local_irq_enable(); + + i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + nw = PTRS_PER_PTE - i; + if (addr + (nw << PAGE_SHIFT) > next) + nw = (next - addr) >> PAGE_SHIFT; + + up_write(&mm->mmap_sem); + err = -EFAULT; + if (__copy_from_user(spp, map, nw * sizeof(u32))) + goto out2; + map += nw; + down_write(&mm->mmap_sem); + + /* now flush any existing HPTEs for the range */ + hpte_flush_range(mm, addr, nw); + } + if (limit > spt->maxaddr) + spt->maxaddr = limit; + err = 0; + out: + up_write(&mm->mmap_sem); + out2: + return err; +} |