diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/gup.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 178 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 183 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-hash64.c | 33 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 192 | ||||
-rw-r--r-- | arch/powerpc/mm/icswx.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash64.c | 37 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 278 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 118 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 223 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 18 |
14 files changed, 929 insertions, 379 deletions
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index d7efdbf640c..4b921affa49 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -68,7 +68,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); if (pmd_none(pmd)) return 0; - if (is_hugepd(pmdp)) { + if (pmd_huge(pmd)) { + if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, + write, pages, nr)) + return 0; + } else if (is_hugepd(pmdp)) { if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, addr, next, write, pages, nr)) return 0; @@ -92,7 +96,11 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (is_hugepd(pudp)) { + if (pud_huge(pud)) { + if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, + write, pages, nr)) + return 0; + } else if (is_hugepd(pudp)) { if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, addr, next, write, pages, nr)) return 0; @@ -153,7 +161,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (is_hugepd(pgdp)) { + if (pgd_huge(pgd)) { + if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, + write, pages, &nr)) + goto slow; + } else if (is_hugepd(pgdp)) { if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, addr, next, write, pages, &nr)) goto slow; diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 7443481a315..0e980acae67 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -196,7 +196,8 @@ htab_insert_pte: mr r4,r29 /* Retrieve vpn */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_4K /* page size */ - ld r9,STK_PARAM(R9)(r1) /* segment size */ + li r9,MMU_PAGE_4K /* actual page size */ + ld r10,STK_PARAM(R9)(r1) /* segment size */ _GLOBAL(htab_call_hpte_insert1) bl . /* Patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -219,7 +220,8 @@ _GLOBAL(htab_call_hpte_insert1) mr r4,r29 /* Retrieve vpn */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_4K /* page size */ - ld r9,STK_PARAM(R9)(r1) /* segment size */ + li r9,MMU_PAGE_4K /* actual page size */ + ld r10,STK_PARAM(R9)(r1) /* segment size */ _GLOBAL(htab_call_hpte_insert2) bl . /* Patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -490,7 +492,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) beq htab_inval_old_hpte ld r6,STK_PARAM(R6)(r1) - ori r26,r6,0x8000 /* Load the hidx mask */ + ori r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */ ld r26,0(r26) addi r5,r25,36 /* Check actual HPTE_SUB bit, this */ rldcr. r0,r31,r5,0 /* must match pgtable.h definition */ @@ -515,7 +517,8 @@ htab_special_pfn: mr r4,r29 /* Retrieve vpn */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_4K /* page size */ - ld r9,STK_PARAM(R9)(r1) /* segment size */ + li r9,MMU_PAGE_4K /* actual page size */ + ld r10,STK_PARAM(R9)(r1) /* segment size */ _GLOBAL(htab_call_hpte_insert1) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -542,7 +545,8 @@ _GLOBAL(htab_call_hpte_insert1) mr r4,r29 /* Retrieve vpn */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_4K /* page size */ - ld r9,STK_PARAM(R9)(r1) /* segment size */ + li r9,MMU_PAGE_4K /* actual page size */ + ld r10,STK_PARAM(R9)(r1) /* segment size */ _GLOBAL(htab_call_hpte_insert2) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -607,7 +611,7 @@ htab_pte_insert_ok: sld r4,r4,r5 andc r26,r26,r4 or r26,r26,r3 - ori r5,r6,0x8000 + ori r5,r6,PTE_PAGE_HIDX_OFFSET std r26,0(r5) lwsync std r30,0(r6) @@ -840,7 +844,8 @@ ht64_insert_pte: mr r4,r29 /* Retrieve vpn */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_64K - ld r9,STK_PARAM(R9)(r1) /* segment size */ + li r9,MMU_PAGE_64K /* actual page size */ + ld r10,STK_PARAM(R9)(r1) /* segment size */ _GLOBAL(ht64_call_hpte_insert1) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -863,7 +868,8 @@ _GLOBAL(ht64_call_hpte_insert1) mr r4,r29 /* Retrieve vpn */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_64K - ld r9,STK_PARAM(R9)(r1) /* segment size */ + li r9,MMU_PAGE_64K /* actual page size */ + ld r10,STK_PARAM(R9)(r1) /* segment size */ _GLOBAL(ht64_call_hpte_insert2) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ffc1e00f7a2..6a2aead5b0e 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -39,7 +39,7 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long vpn, int psize, int ssize) +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long va; unsigned int penc; @@ -61,17 +61,31 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize) switch (psize) { case MMU_PAGE_4K: + /* clear out bits after (52) [0....52.....63] */ + va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; + va |= mmu_psize_defs[apsize].sllp << 6; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; default: /* We need 14 to 14 + i bits of va */ - penc = mmu_psize_defs[psize].penc; - va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); + penc = mmu_psize_defs[psize].penc[apsize]; + va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); va |= penc << 12; va |= ssize << 8; + /* Add AVAL part */ + if (psize != apsize) { + /* + * MPSS, 64K base page size and 16MB parge page size + * We don't need all the bits, but rest of the bits + * must be ignored by the processor. + * vpn cover upto 65 bits of va. (0...65) and we need + * 58..64 bits of va. + */ + va |= (vpn & 0xfe); + } va |= 1; /* L */ asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) @@ -80,7 +94,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize) } } -static inline void __tlbiel(unsigned long vpn, int psize, int ssize) +static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long va; unsigned int penc; @@ -96,16 +110,30 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize) switch (psize) { case MMU_PAGE_4K: + /* clear out bits after(52) [0....52.....63] */ + va &= ~((1ul << (64 - 52)) - 1); va |= ssize << 8; + va |= mmu_psize_defs[apsize].sllp << 6; asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" : : "r"(va) : "memory"); break; default: /* We need 14 to 14 + i bits of va */ - penc = mmu_psize_defs[psize].penc; - va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); + penc = mmu_psize_defs[psize].penc[apsize]; + va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); va |= penc << 12; va |= ssize << 8; + /* Add AVAL part */ + if (psize != apsize) { + /* + * MPSS, 64K base page size and 16MB parge page size + * We don't need all the bits, but rest of the bits + * must be ignored by the processor. + * vpn cover upto 65 bits of va. (0...65) and we need + * 58..64 bits of va. + */ + va |= (vpn & 0xfe); + } va |= 1; /* L */ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" : : "r"(va) : "memory"); @@ -114,7 +142,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize) } -static inline void tlbie(unsigned long vpn, int psize, int ssize, int local) +static inline void tlbie(unsigned long vpn, int psize, int apsize, + int ssize, int local) { unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); @@ -125,10 +154,10 @@ static inline void tlbie(unsigned long vpn, int psize, int ssize, int local) raw_spin_lock(&native_tlbie_lock); asm volatile("ptesync": : :"memory"); if (use_local) { - __tlbiel(vpn, psize, ssize); + __tlbiel(vpn, psize, apsize, ssize); asm volatile("ptesync": : :"memory"); } else { - __tlbie(vpn, psize, ssize); + __tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -156,7 +185,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep) static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, unsigned long rflags, - unsigned long vflags, int psize, int ssize) + unsigned long vflags, int psize, int apsize, int ssize) { struct hash_pte *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; @@ -183,8 +212,8 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, if (i == HPTES_PER_GROUP) return -1; - hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize) | rflags; + hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", @@ -244,6 +273,51 @@ static long native_hpte_remove(unsigned long hpte_group) return i; } +static inline int __hpte_actual_psize(unsigned int lp, int psize) +{ + int i, shift; + unsigned int mask; + + /* start from 1 ignoring MMU_PAGE_4K */ + for (i = 1; i < MMU_PAGE_COUNT; i++) { + + /* invalid penc */ + if (mmu_psize_defs[psize].penc[i] == -1) + continue; + /* + * encoding bits per actual page size + * PTE LP actual page size + * rrrr rrrz >=8KB + * rrrr rrzz >=16KB + * rrrr rzzz >=32KB + * rrrr zzzz >=64KB + * ....... + */ + shift = mmu_psize_defs[i].shift - LP_SHIFT; + if (shift > LP_BITS) + shift = LP_BITS; + mask = (1 << shift) - 1; + if ((lp & mask) == mmu_psize_defs[psize].penc[i]) + return i; + } + return -1; +} + +static inline int hpte_actual_psize(struct hash_pte *hptep, int psize) +{ + /* Look at the 8 bit LP value */ + unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); + + if (!(hptep->v & HPTE_V_VALID)) + return -1; + + /* First check if it is large page */ + if (!(hptep->v & HPTE_V_LARGE)) + return MMU_PAGE_4K; + + return __hpte_actual_psize(lp, psize); +} + static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int ssize, int local) @@ -251,8 +325,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; int ret = 0; + int actual_psize; - want_v = hpte_encode_v(vpn, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); @@ -260,9 +335,13 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); hpte_v = hptep->v; - + actual_psize = hpte_actual_psize(hptep, psize); + if (actual_psize < 0) { + native_unlock_hpte(hptep); + return -1; + } /* Even if we miss, we need to invalidate the TLB */ - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { + if (!HPTE_V_COMPARE(hpte_v, want_v)) { DBG_LOW(" -> miss\n"); ret = -1; } else { @@ -274,7 +353,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ - tlbie(vpn, psize, ssize, local); + tlbie(vpn, psize, actual_psize, ssize, local); return ret; } @@ -288,7 +367,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) unsigned long want_v, hpte_v; hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); - want_v = hpte_encode_v(vpn, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); /* Bolted mappings are only ever in the primary group */ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -315,6 +394,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { + int actual_psize; unsigned long vpn; unsigned long vsid; long slot; @@ -327,13 +407,16 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, if (slot == -1) panic("could not find page to bolt\n"); hptep = htab_address + slot; + actual_psize = hpte_actual_psize(hptep, psize); + if (actual_psize < 0) + return; /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N)); /* Ensure it is out of the tlb too. */ - tlbie(vpn, psize, ssize, 0); + tlbie(vpn, psize, actual_psize, ssize, 0); } static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, @@ -343,64 +426,60 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, unsigned long hpte_v; unsigned long want_v; unsigned long flags; + int actual_psize; local_irq_save(flags); DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); - want_v = hpte_encode_v(vpn, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; + actual_psize = hpte_actual_psize(hptep, psize); + if (actual_psize < 0) { + native_unlock_hpte(hptep); + local_irq_restore(flags); + return; + } /* Even if we miss, we need to invalidate the TLB */ - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) + if (!HPTE_V_COMPARE(hpte_v, want_v)) native_unlock_hpte(hptep); else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; /* Invalidate the TLB */ - tlbie(vpn, psize, ssize, local); + tlbie(vpn, psize, actual_psize, ssize, local); local_irq_restore(flags); } -#define LP_SHIFT 12 -#define LP_BITS 8 -#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) - static void hpte_decode(struct hash_pte *hpte, unsigned long slot, - int *psize, int *ssize, unsigned long *vpn) + int *psize, int *apsize, int *ssize, unsigned long *vpn) { unsigned long avpn, pteg, vpi; - unsigned long hpte_r = hpte->r; unsigned long hpte_v = hpte->v; unsigned long vsid, seg_off; - int i, size, shift, penc; + int size, a_size, shift; + /* Look at the 8 bit LP value */ + unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); - if (!(hpte_v & HPTE_V_LARGE)) - size = MMU_PAGE_4K; - else { - for (i = 0; i < LP_BITS; i++) { - if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1)) - break; - } - penc = LP_MASK(i+1) >> LP_SHIFT; + if (!(hpte_v & HPTE_V_LARGE)) { + size = MMU_PAGE_4K; + a_size = MMU_PAGE_4K; + } else { for (size = 0; size < MMU_PAGE_COUNT; size++) { - /* 4K pages are not represented by LP */ - if (size == MMU_PAGE_4K) - continue; - /* valid entries have a shift value */ if (!mmu_psize_defs[size].shift) continue; - if (penc == mmu_psize_defs[size].penc) + a_size = __hpte_actual_psize(lp, size); + if (a_size != -1) break; } } - /* This works for all page sizes, and for 256M and 1T segments */ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; @@ -433,7 +512,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, default: *vpn = size = 0; } - *psize = size; + *psize = size; + *apsize = a_size; } /* @@ -451,7 +531,7 @@ static void native_hpte_clear(void) struct hash_pte *hptep = htab_address; unsigned long hpte_v; unsigned long pteg_count; - int psize, ssize; + int psize, apsize, ssize; pteg_count = htab_hash_mask + 1; @@ -477,9 +557,9 @@ static void native_hpte_clear(void) * already hold the native_tlbie_lock. */ if (hpte_v & HPTE_V_VALID) { - hpte_decode(hptep, slot, &psize, &ssize, &vpn); + hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); hptep->v = 0; - __tlbie(vpn, psize, ssize); + __tlbie(vpn, psize, apsize, ssize); } } @@ -520,7 +600,7 @@ static void native_flush_hash_range(unsigned long number, int local) slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; hptep = htab_address + slot; - want_v = hpte_encode_v(vpn, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; if (!HPTE_V_COMPARE(hpte_v, want_v) || @@ -540,7 +620,7 @@ static void native_flush_hash_range(unsigned long number, int local) pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { - __tlbiel(vpn, psize, ssize); + __tlbiel(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } asm volatile("ptesync":::"memory"); @@ -557,7 +637,7 @@ static void native_flush_hash_range(unsigned long number, int local) pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { - __tlbie(vpn, psize, ssize); + __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } asm volatile("eieio; tlbsync; ptesync":::"memory"); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f410c3e12c1..3e4c4ed1933 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -126,7 +126,7 @@ static struct mmu_psize_def mmu_psize_defaults_old[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, - .penc = 0, + .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1}, .avpnm = 0, .tlbiel = 0, }, @@ -140,14 +140,15 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, - .penc = 0, + .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1}, .avpnm = 0, .tlbiel = 1, }, [MMU_PAGE_16M] = { .shift = 24, .sllp = SLB_VSID_L, - .penc = 0, + .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0, + [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 }, .avpnm = 0x1UL, .tlbiel = 0, }, @@ -209,7 +210,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, BUG_ON(!ppc_md.hpte_insert); ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, - HPTE_V_BOLTED, psize, ssize); + HPTE_V_BOLTED, psize, psize, ssize); if (ret < 0) break; @@ -276,6 +277,30 @@ static void __init htab_init_seg_sizes(void) of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); } +static int __init get_idx_from_shift(unsigned int shift) +{ + int idx = -1; + + switch (shift) { + case 0xc: + idx = MMU_PAGE_4K; + break; + case 0x10: + idx = MMU_PAGE_64K; + break; + case 0x14: + idx = MMU_PAGE_1M; + break; + case 0x18: + idx = MMU_PAGE_16M; + break; + case 0x22: + idx = MMU_PAGE_16G; + break; + } + return idx; +} + static int __init htab_dt_scan_page_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -291,64 +316,65 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, prop = (u32 *)of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); if (prop != NULL) { - DBG("Page sizes from device-tree:\n"); + pr_info("Page sizes from device-tree:\n"); size /= 4; cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); while(size > 0) { - unsigned int shift = prop[0]; + unsigned int base_shift = prop[0]; unsigned int slbenc = prop[1]; unsigned int lpnum = prop[2]; - unsigned int lpenc = 0; struct mmu_psize_def *def; - int idx = -1; + int idx, base_idx; size -= 3; prop += 3; - while(size > 0 && lpnum) { - if (prop[0] == shift) - lpenc = prop[1]; - prop += 2; size -= 2; - lpnum--; + base_idx = get_idx_from_shift(base_shift); + if (base_idx < 0) { + /* + * skip the pte encoding also + */ + prop += lpnum * 2; size -= lpnum * 2; + continue; } - switch(shift) { - case 0xc: - idx = MMU_PAGE_4K; - break; - case 0x10: - idx = MMU_PAGE_64K; - break; - case 0x14: - idx = MMU_PAGE_1M; - break; - case 0x18: - idx = MMU_PAGE_16M; + def = &mmu_psize_defs[base_idx]; + if (base_idx == MMU_PAGE_16M) cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; - break; - case 0x22: - idx = MMU_PAGE_16G; - break; - } - if (idx < 0) - continue; - def = &mmu_psize_defs[idx]; - def->shift = shift; - if (shift <= 23) + + def->shift = base_shift; + if (base_shift <= 23) def->avpnm = 0; else - def->avpnm = (1 << (shift - 23)) - 1; + def->avpnm = (1 << (base_shift - 23)) - 1; def->sllp = slbenc; - def->penc = lpenc; - /* We don't know for sure what's up with tlbiel, so + /* + * We don't know for sure what's up with tlbiel, so * for now we only set it for 4K and 64K pages */ - if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K) + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) def->tlbiel = 1; else def->tlbiel = 0; - DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " - "tlbiel=%d, penc=%d\n", - idx, shift, def->sllp, def->avpnm, def->tlbiel, - def->penc); + while (size > 0 && lpnum) { + unsigned int shift = prop[0]; + int penc = prop[1]; + + prop += 2; size -= 2; + lpnum--; + + idx = get_idx_from_shift(shift); + if (idx < 0) + continue; + + if (penc == -1) + pr_err("Invalid penc for base_shift=%d " + "shift=%d\n", base_shift, shift); + + def->penc[idx] = penc; + pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," + " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", + base_shift, shift, def->sllp, + def->avpnm, def->tlbiel, def->penc[idx]); + } } return 1; } @@ -397,10 +423,21 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, } #endif /* CONFIG_HUGETLB_PAGE */ +static void mmu_psize_set_default_penc(void) +{ + int bpsize, apsize; + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) + for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++) + mmu_psize_defs[bpsize].penc[apsize] = -1; +} + static void __init htab_init_page_sizes(void) { int rc; + /* se the invalid penc to -1 */ + mmu_psize_set_default_penc(); + /* Default to 4K pages only */ memcpy(mmu_psize_defs, mmu_psize_defaults_old, sizeof(mmu_psize_defaults_old)); @@ -899,14 +936,14 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) void hash_failure_debug(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long trap, - int ssize, int psize, unsigned long pte) + int ssize, int psize, int lpsize, unsigned long pte) { if (!printk_ratelimit()) return; pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", ea, access, current->comm); - pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", - trap, vsid, ssize, psize, pte); + pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n", + trap, vsid, ssize, psize, lpsize, pte); } /* Result code is: @@ -1079,7 +1116,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) */ if (rc == -1) hash_failure_debug(ea, access, vsid, trap, ssize, psize, - pte_val(*ptep)); + psize, pte_val(*ptep)); #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); #else @@ -1157,7 +1194,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, */ if (rc == -1) hash_failure_debug(ea, access, vsid, trap, ssize, - mm->context.user_psize, pte_val(*ptep)); + mm->context.user_psize, + mm->context.user_psize, + pte_val(*ptep)); local_irq_restore(flags); } @@ -1230,24 +1269,60 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) bad_page_fault(regs, address, SIGBUS); } +long hpte_insert_repeating(unsigned long hash, unsigned long vpn, + unsigned long pa, unsigned long rflags, + unsigned long vflags, int psize, int ssize) +{ + unsigned long hpte_group; + long slot; + +repeat: + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags, + psize, psize, ssize); + + /* Primary is full, try the secondary */ + if (unlikely(slot == -1)) { + hpte_group = ((~hash & htab_hash_mask) * + HPTES_PER_GROUP) & ~0x7UL; + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, + vflags | HPTE_V_SECONDARY, + psize, psize, ssize); + if (slot == -1) { + if (mftb() & 0x1) + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP)&~0x7UL; + + ppc_md.hpte_remove(hpte_group); + goto repeat; + } + } + + return slot; +} + #ifdef CONFIG_DEBUG_PAGEALLOC static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) { - unsigned long hash, hpteg; + unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); - int ret; + long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); /* Don't create HPTE entries for bad address */ if (!vsid) return; - ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr), - mode, HPTE_V_BOLTED, - mmu_linear_psize, mmu_kernel_ssize); + + ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, + HPTE_V_BOLTED, + mmu_linear_psize, mmu_kernel_ssize); + BUG_ON (ret < 0); spin_lock(&linear_map_hash_lock); BUG_ON(linear_map_hash_slots[lmi] & 0x80); diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index cecad348f60..0f1d94a1fb8 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -14,6 +14,10 @@ #include <asm/cacheflush.h> #include <asm/machdep.h> +extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, + unsigned long pa, unsigned long rlags, + unsigned long vflags, int psize, int ssize); + int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, unsigned int shift, unsigned int mmu_psize) @@ -83,14 +87,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (likely(!(old_pte & _PAGE_HASHPTE))) { unsigned long hash = hpt_hash(vpn, shift, ssize); - unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; -repeat: - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; - /* clear HPTE slot informations in new PTE */ #ifdef CONFIG_PPC_64K_PAGES new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; @@ -101,26 +100,8 @@ repeat: rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED)); - /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - mmu_psize, ssize); - - /* Primary is full, try the secondary */ - if (unlikely(slot == -1)) { - hpte_group = ((~hash & htab_hash_mask) * - HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, - HPTE_V_SECONDARY, - mmu_psize, ssize); - if (slot == -1) { - if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * - HPTES_PER_GROUP)&~0x7UL; - - ppc_md.hpte_remove(hpte_group); - goto repeat; - } - } + slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0, + mmu_psize, ssize); /* * Hypervisor failure. Restore old pte and return -1 @@ -129,7 +110,7 @@ repeat: if (unlikely(slot == -2)) { *ptep = __pte(old_pte); hash_failure_debug(ea, access, vsid, trap, ssize, - mmu_psize, old_pte); + mmu_psize, mmu_psize, old_pte); return -1; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1a6de0a7d8e..237c8e5f264 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -48,30 +48,71 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; #endif -static inline int shift_to_mmu_psize(unsigned int shift) +#define hugepd_none(hpd) ((hpd).pd == 0) + +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * At this point we do the placement change only for BOOK3S 64. This would + * possibly work on other subarchs. + */ + +/* + * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have + * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + */ +int pmd_huge(pmd_t pmd) { - int psize; + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) - if (mmu_psize_defs[psize].shift == shift) - return psize; - return -1; +int pud_huge(pud_t pud) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pud_val(pud) & 0x3) != 0x0); } -static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +int pgd_huge(pgd_t pgd) { - if (mmu_psize_defs[mmu_psize].shift) - return mmu_psize_defs[mmu_psize].shift; - BUG(); + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pgd_val(pgd) & 0x3) != 0x0); +} +#else +int pmd_huge(pmd_t pmd) +{ + return 0; } -#define hugepd_none(hpd) ((hpd).pd == 0) +int pud_huge(pud_t pud) +{ + return 0; +} + +int pgd_huge(pgd_t pgd) +{ + return 0; +} +#endif +/* + * We have 4 cases for pgds and pmds: + * (1) invalid (all zeroes) + * (2) pointer to next table, as normal; bottom 6 bits == 0 + * (3) leaf pte for huge page, bottom two bits != 00 + * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table + */ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) { pgd_t *pg; pud_t *pu; pmd_t *pm; + pte_t *ret_pte; hugepd_t *hpdp = NULL; unsigned pdshift = PGDIR_SHIFT; @@ -79,30 +120,43 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift *shift = 0; pg = pgdir + pgd_index(ea); - if (is_hugepd(pg)) { + + if (pgd_huge(*pg)) { + ret_pte = (pte_t *) pg; + goto out; + } else if (is_hugepd(pg)) hpdp = (hugepd_t *)pg; - } else if (!pgd_none(*pg)) { + else if (!pgd_none(*pg)) { pdshift = PUD_SHIFT; pu = pud_offset(pg, ea); - if (is_hugepd(pu)) + + if (pud_huge(*pu)) { + ret_pte = (pte_t *) pu; + goto out; + } else if (is_hugepd(pu)) hpdp = (hugepd_t *)pu; else if (!pud_none(*pu)) { pdshift = PMD_SHIFT; pm = pmd_offset(pu, ea); - if (is_hugepd(pm)) + + if (pmd_huge(*pm)) { + ret_pte = (pte_t *) pm; + goto out; + } else if (is_hugepd(pm)) hpdp = (hugepd_t *)pm; - else if (!pmd_none(*pm)) { + else if (!pmd_none(*pm)) return pte_offset_kernel(pm, ea); - } } } - if (!hpdp) return NULL; + ret_pte = hugepte_offset(hpdp, ea, pdshift); + pdshift = hugepd_shift(*hpdp); +out: if (shift) - *shift = hugepd_shift(*hpdp); - return hugepte_offset(hpdp, ea, pdshift); + *shift = pdshift; + return ret_pte; } EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); @@ -145,6 +199,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (unlikely(!hugepd_none(*hpdp))) break; else + /* We use the old format for PPC_FSL_BOOK3E */ hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; } /* If we bailed from the for loop early, an error occurred, clean up */ @@ -156,9 +211,15 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #else if (!hugepd_none(*hpdp)) kmem_cache_free(cachep, new); - else + else { +#ifdef CONFIG_PPC_BOOK3S_64 + hpdp->pd = (unsigned long)new | + (shift_to_mmu_psize(pshift) << 2); +#else hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; #endif + } +#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -175,6 +236,61 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #define HUGEPD_PUD_SHIFT PMD_SHIFT #endif +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * At this point we do the placement change only for BOOK3S 64. This would + * possibly work on other subarchs. + */ +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +{ + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + hugepd_t *hpdp = NULL; + unsigned pshift = __ffs(sz); + unsigned pdshift = PGDIR_SHIFT; + + addr &= ~(sz-1); + pg = pgd_offset(mm, addr); + + if (pshift == PGDIR_SHIFT) + /* 16GB huge page */ + return (pte_t *) pg; + else if (pshift > PUD_SHIFT) + /* + * We need to use hugepd table + */ + hpdp = (hugepd_t *)pg; + else { + pdshift = PUD_SHIFT; + pu = pud_alloc(mm, pg, addr); + if (pshift == PUD_SHIFT) + return (pte_t *)pu; + else if (pshift > PMD_SHIFT) + hpdp = (hugepd_t *)pu; + else { + pdshift = PMD_SHIFT; + pm = pmd_alloc(mm, pu, addr); + if (pshift == PMD_SHIFT) + /* 16MB hugepage */ + return (pte_t *)pm; + else + hpdp = (hugepd_t *)pm; + } + } + if (!hpdp) + return NULL; + + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); + + if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) + return NULL; + + return hugepte_offset(hpdp, addr, pdshift); +} + +#else + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pg; @@ -212,6 +328,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(hpdp, addr, pdshift); } +#endif #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early @@ -475,7 +592,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (pmd_none(*pmd)) + if (pmd_none_or_clear_bad(pmd)) continue; #ifdef CONFIG_PPC_FSL_BOOK3E /* @@ -628,16 +745,6 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) return page; } -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -int pud_huge(pud_t pud) -{ - return 0; -} - struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) @@ -646,8 +753,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) +int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; unsigned long pte_end; @@ -742,7 +849,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif @@ -883,11 +990,16 @@ static int __init hugetlbpage_init(void) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; - - pgtable_cache_add(pdshift - shift, NULL); - if (!PGT_CACHE(pdshift - shift)) - panic("hugetlbpage_init(): could not create " - "pgtable cache for %d bit pagesize\n", shift); + /* + * if we have pdshift and shift value same, we don't + * use pgt cache for hugepd. + */ + if (pdshift != shift) { + pgtable_cache_add(pdshift - shift, NULL); + if (!PGT_CACHE(pdshift - shift)) + panic("hugetlbpage_init(): could not create " + "pgtable cache for %d bit pagesize\n", shift); + } } /* Set default large page size. Currently, we pick 16M or 1M diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c index 8cdbd8634a5..915412e4d5b 100644 --- a/arch/powerpc/mm/icswx.c +++ b/arch/powerpc/mm/icswx.c @@ -67,7 +67,7 @@ void switch_cop(struct mm_struct *next) { -#ifdef CONFIG_ICSWX_PID +#ifdef CONFIG_PPC_ICSWX_PID mtspr(SPRN_PID, next->context.cop_pid); #endif mtspr(SPRN_ACOP, next->context.acop); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 5a535b73ea1..c2787bf779c 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -129,8 +129,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); new = kmem_cache_create(name, table_size, align, 0, ctor); - PGT_CACHE(shift) = new; - + pgtable_cache[shift - 1] = new; pr_debug("Allocated pgtable cache for order %d\n", shift); } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index cd76c454942..0988a26e041 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -66,10 +66,9 @@ unsigned long long memory_limit; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; +EXPORT_SYMBOL(kmap_pte); pgprot_t kmap_prot; - EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); static inline pte_t *virt_to_kpte(unsigned long vaddr) { diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index d1d1b92c5b9..178876aef40 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <asm/mmu_context.h> +#include <asm/pgalloc.h> #include "icswx.h" @@ -85,6 +86,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) spin_lock_init(mm->context.cop_lockp); #endif /* CONFIG_PPC_ICSWX */ +#ifdef CONFIG_PPC_64K_PAGES + mm->context.pte_frag = NULL; +#endif return 0; } @@ -96,13 +100,46 @@ void __destroy_context(int context_id) } EXPORT_SYMBOL_GPL(__destroy_context); +#ifdef CONFIG_PPC_64K_PAGES +static void destroy_pagetable_page(struct mm_struct *mm) +{ + int count; + void *pte_frag; + struct page *page; + + pte_frag = mm->context.pte_frag; + if (!pte_frag) + return; + + page = virt_to_page(pte_frag); + /* drop all the pending references */ + count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; + /* We allow PTE_FRAG_NR fragments from a PTE page */ + count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count); + if (!count) { + pgtable_page_dtor(page); + free_hot_cold_page(page, 0); + } +} + +#else +static inline void destroy_pagetable_page(struct mm_struct *mm) +{ + return; +} +#endif + + void destroy_context(struct mm_struct *mm) { + #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); kfree(mm->context.cop_lockp); mm->context.cop_lockp = NULL; #endif /* CONFIG_PPC_ICSWX */ + + destroy_pagetable_page(mm); __destroy_context(mm->context.id); subpage_prot_free(mm); mm->context.id = MMU_NO_CONTEXT; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index fa33c546e77..88c0425dc0a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -22,6 +22,10 @@ #include <linux/pfn.h> #include <linux/cpuset.h> #include <linux/node.h> +#include <linux/stop_machine.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> #include <linux/slab.h> #include <asm/sparsemem.h> #include <asm/prom.h> @@ -30,6 +34,7 @@ #include <asm/paca.h> #include <asm/hvcall.h> #include <asm/setup.h> +#include <asm/vdso.h> static int numa_enabled = 1; @@ -77,7 +82,7 @@ static void __init setup_node_to_cpumask_map(void) dbg("Node to cpumask map for %d nodes\n", nr_node_ids); } -static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, +static int __init fake_numa_create_new_node(unsigned long end_pfn, unsigned int *nid) { unsigned long long mem; @@ -199,7 +204,7 @@ int __node_distance(int a, int b) int distance = LOCAL_DISTANCE; if (!form1_affinity) - return distance; + return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); for (i = 0; i < distance_ref_points_depth; i++) { if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) @@ -289,9 +294,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); static int __init find_min_common_depth(void) { int depth; - struct device_node *chosen; struct device_node *root; - const char *vec5; if (firmware_has_feature(FW_FEATURE_OPAL)) root = of_find_node_by_path("/ibm,opal"); @@ -323,24 +326,10 @@ static int __init find_min_common_depth(void) distance_ref_points_depth /= sizeof(int); -#define VEC5_AFFINITY_BYTE 5 -#define VEC5_AFFINITY 0x80 - - if (firmware_has_feature(FW_FEATURE_OPAL)) + if (firmware_has_feature(FW_FEATURE_OPAL) || + firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) { + dbg("Using form 1 affinity\n"); form1_affinity = 1; - else { - chosen = of_find_node_by_path("/chosen"); - if (chosen) { - vec5 = of_get_property(chosen, - "ibm,architecture-vec-5", NULL); - if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & - VEC5_AFFINITY)) { - dbg("Using form 1 affinity\n"); - form1_affinity = 1; - } - - of_node_put(chosen); - } } if (form1_affinity) { @@ -1268,10 +1257,18 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR +struct topology_update_data { + struct topology_update_data *next; + unsigned int cpu; + int old_nid; + int new_nid; +}; + static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; static cpumask_t cpu_associativity_changes_mask; static int vphn_enabled; -static void set_topology_timer(void); +static int prrn_enabled; +static void reset_topology_timer(void); /* * Store the current values of the associativity change counters in the @@ -1307,11 +1304,9 @@ static void setup_cpu_associativity_change_counters(void) */ static int update_cpu_associativity_changes_mask(void) { - int cpu, nr_cpus = 0; + int cpu; cpumask_t *changes = &cpu_associativity_changes_mask; - cpumask_clear(changes); - for_each_possible_cpu(cpu) { int i, changed = 0; u8 *counts = vphn_cpu_change_counts[cpu]; @@ -1325,11 +1320,10 @@ static int update_cpu_associativity_changes_mask(void) } if (changed) { cpumask_set_cpu(cpu, changes); - nr_cpus++; } } - return nr_cpus; + return cpumask_weight(changes); } /* @@ -1421,40 +1415,84 @@ static long vphn_get_associativity(unsigned long cpu, } /* + * Update the CPU maps and sysfs entries for a single CPU when its NUMA + * characteristics change. This function doesn't perform any locking and is + * only safe to call from stop_machine(). + */ +static int update_cpu_topology(void *data) +{ + struct topology_update_data *update; + unsigned long cpu; + + if (!data) + return -EINVAL; + + cpu = get_cpu(); + + for (update = data; update; update = update->next) { + if (cpu != update->cpu) + continue; + + unregister_cpu_under_node(update->cpu, update->old_nid); + unmap_cpu_from_node(update->cpu); + map_cpu_to_node(update->cpu, update->new_nid); + vdso_getcpu_init(); + register_cpu_under_node(update->cpu, update->new_nid); + } + + return 0; +} + +/* * Update the node maps and sysfs entries for each cpu whose home node * has changed. Returns 1 when the topology has changed, and 0 otherwise. */ int arch_update_cpu_topology(void) { - int cpu, nid, old_nid, changed = 0; + unsigned int cpu, changed = 0; + struct topology_update_data *updates, *ud; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; + cpumask_t updated_cpus; struct device *dev; + int weight, i = 0; - for_each_cpu(cpu,&cpu_associativity_changes_mask) { + weight = cpumask_weight(&cpu_associativity_changes_mask); + if (!weight) + return 0; + + updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL); + if (!updates) + return 0; + + cpumask_clear(&updated_cpus); + + for_each_cpu(cpu, &cpu_associativity_changes_mask) { + ud = &updates[i++]; + ud->cpu = cpu; vphn_get_associativity(cpu, associativity); - nid = associativity_to_nid(associativity); + ud->new_nid = associativity_to_nid(associativity); - if (nid < 0 || !node_online(nid)) - nid = first_online_node; + if (ud->new_nid < 0 || !node_online(ud->new_nid)) + ud->new_nid = first_online_node; - old_nid = numa_cpu_lookup_table[cpu]; + ud->old_nid = numa_cpu_lookup_table[cpu]; + cpumask_set_cpu(cpu, &updated_cpus); - /* Disable hotplug while we update the cpu - * masks and sysfs. - */ - get_online_cpus(); - unregister_cpu_under_node(cpu, old_nid); - unmap_cpu_from_node(cpu); - map_cpu_to_node(cpu, nid); - register_cpu_under_node(cpu, nid); - put_online_cpus(); - - dev = get_cpu_device(cpu); + if (i < weight) + ud->next = &updates[i]; + } + + stop_machine(update_cpu_topology, &updates[0], &updated_cpus); + + for (ud = &updates[0]; ud; ud = ud->next) { + dev = get_cpu_device(ud->cpu); if (dev) kobject_uevent(&dev->kobj, KOBJ_CHANGE); + cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); changed = 1; } + kfree(updates); return changed; } @@ -1471,49 +1509,165 @@ void topology_schedule_update(void) static void topology_timer_fn(unsigned long ignored) { - if (!vphn_enabled) - return; - if (update_cpu_associativity_changes_mask() > 0) + if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) topology_schedule_update(); - set_topology_timer(); + else if (vphn_enabled) { + if (update_cpu_associativity_changes_mask() > 0) + topology_schedule_update(); + reset_topology_timer(); + } } static struct timer_list topology_timer = TIMER_INITIALIZER(topology_timer_fn, 0, 0); -static void set_topology_timer(void) +static void reset_topology_timer(void) { topology_timer.data = 0; topology_timer.expires = jiffies + 60 * HZ; - add_timer(&topology_timer); + mod_timer(&topology_timer, topology_timer.expires); } +#ifdef CONFIG_SMP + +static void stage_topology_update(int core_id) +{ + cpumask_or(&cpu_associativity_changes_mask, + &cpu_associativity_changes_mask, cpu_sibling_mask(core_id)); + reset_topology_timer(); +} + +static int dt_update_callback(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_prop_reconfig *update; + int rc = NOTIFY_DONE; + + switch (action) { + case OF_RECONFIG_UPDATE_PROPERTY: + update = (struct of_prop_reconfig *)data; + if (!of_prop_cmp(update->dn->type, "cpu") && + !of_prop_cmp(update->prop->name, "ibm,associativity")) { + u32 core_id; + of_property_read_u32(update->dn, "reg", &core_id); + stage_topology_update(core_id); + rc = NOTIFY_OK; + } + break; + } + + return rc; +} + +static struct notifier_block dt_update_nb = { + .notifier_call = dt_update_callback, +}; + +#endif + /* - * Start polling for VPHN associativity changes. + * Start polling for associativity changes. */ int start_topology_update(void) { int rc = 0; - /* Disabled until races with load balancing are fixed */ - if (0 && firmware_has_feature(FW_FEATURE_VPHN) && - get_lppaca()->shared_proc) { - vphn_enabled = 1; - setup_cpu_associativity_change_counters(); - init_timer_deferrable(&topology_timer); - set_topology_timer(); - rc = 1; + if (firmware_has_feature(FW_FEATURE_PRRN)) { + if (!prrn_enabled) { + prrn_enabled = 1; + vphn_enabled = 0; +#ifdef CONFIG_SMP + rc = of_reconfig_notifier_register(&dt_update_nb); +#endif + } + } else if (firmware_has_feature(FW_FEATURE_VPHN) && + get_lppaca()->shared_proc) { + if (!vphn_enabled) { + prrn_enabled = 0; + vphn_enabled = 1; + setup_cpu_associativity_change_counters(); + init_timer_deferrable(&topology_timer); + reset_topology_timer(); + } } return rc; } -__initcall(start_topology_update); /* * Disable polling for VPHN associativity changes. */ int stop_topology_update(void) { - vphn_enabled = 0; - return del_timer_sync(&topology_timer); + int rc = 0; + + if (prrn_enabled) { + prrn_enabled = 0; +#ifdef CONFIG_SMP + rc = of_reconfig_notifier_unregister(&dt_update_nb); +#endif + } else if (vphn_enabled) { + vphn_enabled = 0; + rc = del_timer_sync(&topology_timer); + } + + return rc; +} + +int prrn_is_enabled(void) +{ + return prrn_enabled; +} + +static int topology_read(struct seq_file *file, void *v) +{ + if (vphn_enabled || prrn_enabled) + seq_puts(file, "on\n"); + else + seq_puts(file, "off\n"); + + return 0; +} + +static int topology_open(struct inode *inode, struct file *file) +{ + return single_open(file, topology_read, NULL); +} + +static ssize_t topology_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + char kbuf[4]; /* "on" or "off" plus null. */ + int read_len; + + read_len = count < 3 ? count : 3; + if (copy_from_user(kbuf, buf, read_len)) + return -EINVAL; + + kbuf[read_len] = '\0'; + + if (!strncmp(kbuf, "on", 2)) + start_topology_update(); + else if (!strncmp(kbuf, "off", 3)) + stop_topology_update(); + else + return -EINVAL; + + return count; +} + +static const struct file_operations topology_ops = { + .read = seq_read, + .write = topology_write, + .open = topology_open, + .release = single_release +}; + +static int topology_update_init(void) +{ + start_topology_update(); + proc_create("powerpc/topology_updates", 644, NULL, &topology_ops); + + return 0; } +device_initcall(topology_update_init); #endif /* CONFIG_PPC_SPLPAR */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 654258f165a..a854096e102 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -337,3 +337,121 @@ EXPORT_SYMBOL(__ioremap_at); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); + +#ifdef CONFIG_PPC_64K_PAGES +static pte_t *get_from_cache(struct mm_struct *mm) +{ + void *pte_frag, *ret; + + spin_lock(&mm->page_table_lock); + ret = mm->context.pte_frag; + if (ret) { + pte_frag = ret + PTE_FRAG_SIZE; + /* + * If we have taken up all the fragments mark PTE page NULL + */ + if (((unsigned long)pte_frag & ~PAGE_MASK) == 0) + pte_frag = NULL; + mm->context.pte_frag = pte_frag; + } + spin_unlock(&mm->page_table_lock); + return (pte_t *)ret; +} + +static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) +{ + void *ret = NULL; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + if (!page) + return NULL; + + ret = page_address(page); + spin_lock(&mm->page_table_lock); + /* + * If we find pgtable_page set, we return + * the allocated page with single fragement + * count. + */ + if (likely(!mm->context.pte_frag)) { + atomic_set(&page->_count, PTE_FRAG_NR); + mm->context.pte_frag = ret + PTE_FRAG_SIZE; + } + spin_unlock(&mm->page_table_lock); + + if (!kernel) + pgtable_page_ctor(page); + + return (pte_t *)ret; +} + +pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel) +{ + pte_t *pte; + + pte = get_from_cache(mm); + if (pte) + return pte; + + return __alloc_for_cache(mm, kernel); +} + +void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel) +{ + struct page *page = virt_to_page(table); + if (put_page_testzero(page)) { + if (!kernel) + pgtable_page_dtor(page); + free_hot_cold_page(page, 0); + } +} + +#ifdef CONFIG_SMP +static void page_table_free_rcu(void *table) +{ + struct page *page = virt_to_page(table); + if (put_page_testzero(page)) { + pgtable_page_dtor(page); + free_hot_cold_page(page, 0); + } +} + +void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + unsigned long pgf = (unsigned long)table; + + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + pgf |= shift; + tlb_remove_table(tlb, (void *)pgf); +} + +void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); + unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; + + if (!shift) + /* PTE page needs special handling */ + page_table_free_rcu(table); + else { + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(shift), table); + } +} +#else +void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + if (!shift) { + /* PTE page needs special handling */ + struct page *page = virt_to_page(table); + if (put_page_testzero(page)) { + pgtable_page_dtor(page); + free_hot_cold_page(page, 0); + } + } else { + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(shift), table); + } +} +#endif +#endif /* CONFIG_PPC_64K_PAGES */ diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index cf9dada734b..3e99c149271 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -237,134 +237,112 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz #endif } +/* + * Compute which slice addr is part of; + * set *boundary_addr to the start or end boundary of that slice + * (depending on 'end' parameter); + * return boolean indicating if the slice is marked as available in the + * 'available' slice_mark. + */ +static bool slice_scan_available(unsigned long addr, + struct slice_mask available, + int end, + unsigned long *boundary_addr) +{ + unsigned long slice; + if (addr < SLICE_LOW_TOP) { + slice = GET_LOW_SLICE_INDEX(addr); + *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; + return !!(available.low_slices & (1u << slice)); + } else { + slice = GET_HIGH_SLICE_INDEX(addr); + *boundary_addr = (slice + end) ? + ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; + return !!(available.high_slices & (1u << slice)); + } +} + static unsigned long slice_find_area_bottomup(struct mm_struct *mm, unsigned long len, struct slice_mask available, - int psize, int use_cache) + int psize) { - struct vm_area_struct *vma; - unsigned long start_addr, addr; - struct slice_mask mask; int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); - - if (use_cache) { - if (len <= mm->cached_hole_size) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } else - start_addr = addr = mm->free_area_cache; - } else - start_addr = addr = TASK_UNMAPPED_BASE; - -full_search: - for (;;) { - addr = _ALIGN_UP(addr, 1ul << pshift); - if ((TASK_SIZE - len) < addr) - break; - vma = find_vma(mm, addr); - BUG_ON(vma && (addr >= vma->vm_end)); - - mask = slice_range_to_mask(addr, len); - if (!slice_check_fit(mask, available)) { - if (addr < SLICE_LOW_TOP) - addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); - else - addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); + unsigned long addr, found, next_end; + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); + info.align_offset = 0; + + addr = TASK_UNMAPPED_BASE; + while (addr < TASK_SIZE) { + info.low_limit = addr; + if (!slice_scan_available(addr, available, 1, &addr)) continue; + + next_slice: + /* + * At this point [info.low_limit; addr) covers + * available slices only and ends at a slice boundary. + * Check if we need to reduce the range, or if we can + * extend it to cover the next available slice. + */ + if (addr >= TASK_SIZE) + addr = TASK_SIZE; + else if (slice_scan_available(addr, available, 1, &next_end)) { + addr = next_end; + goto next_slice; } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - if (use_cache) - mm->free_area_cache = addr + len; - return addr; - } - if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - } + info.high_limit = addr; - /* Make sure we didn't miss any holes */ - if (use_cache && start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; + found = vm_unmapped_area(&info); + if (!(found & ~PAGE_MASK)) + return found; } + return -ENOMEM; } static unsigned long slice_find_area_topdown(struct mm_struct *mm, unsigned long len, struct slice_mask available, - int psize, int use_cache) + int psize) { - struct vm_area_struct *vma; - unsigned long addr; - struct slice_mask mask; int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + unsigned long addr, found, prev; + struct vm_unmapped_area_info info; - /* check if free_area_cache is useful for us */ - if (use_cache) { - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - - /* either no address requested or can't fit in requested - * address hole - */ - addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (addr > len) { - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); - mask = slice_range_to_mask(addr, len); - if (slice_check_fit(mask, available) && - slice_area_is_free(mm, addr, len)) - /* remember the address as a hint for - * next time - */ - return (mm->free_area_cache = addr); - } - } + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); + info.align_offset = 0; addr = mm->mmap_base; - while (addr > len) { - /* Go down by chunk size */ - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); - - /* Check for hit with different page size */ - mask = slice_range_to_mask(addr, len); - if (!slice_check_fit(mask, available)) { - if (addr < SLICE_LOW_TOP) - addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); - else if (addr < (1ul << SLICE_HIGH_SHIFT)) - addr = SLICE_LOW_TOP; - else - addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); + while (addr > PAGE_SIZE) { + info.high_limit = addr; + if (!slice_scan_available(addr - 1, available, 0, &addr)) continue; - } + prev_slice: /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: + * At this point [addr; info.high_limit) covers + * available slices only and starts at a slice boundary. + * Check if we need to reduce the range, or if we can + * extend it to cover the previous available slice. */ - vma = find_vma(mm, addr); - if (!vma || (addr + len) <= vma->vm_start) { - /* remember the address as a hint for next time */ - if (use_cache) - mm->free_area_cache = addr; - return addr; + if (addr < PAGE_SIZE) + addr = PAGE_SIZE; + else if (slice_scan_available(addr - 1, available, 0, &prev)) { + addr = prev; + goto prev_slice; } + info.low_limit = addr; - /* remember the largest hole we saw so far */ - if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start; + found = vm_unmapped_area(&info); + if (!(found & ~PAGE_MASK)) + return found; } /* @@ -373,28 +351,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * can happen with large stack limits and large mmap() * allocations. */ - addr = slice_find_area_bottomup(mm, len, available, psize, 0); - - /* - * Restore the topdown base: - */ - if (use_cache) { - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; - } - - return addr; + return slice_find_area_bottomup(mm, len, available, psize); } static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, struct slice_mask mask, int psize, - int topdown, int use_cache) + int topdown) { if (topdown) - return slice_find_area_topdown(mm, len, mask, psize, use_cache); + return slice_find_area_topdown(mm, len, mask, psize); else - return slice_find_area_bottomup(mm, len, mask, psize, use_cache); + return slice_find_area_bottomup(mm, len, mask, psize); } #define or_mask(dst, src) do { \ @@ -415,7 +383,7 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, - int topdown, int use_cache) + int topdown) { struct slice_mask mask = {0, 0}; struct slice_mask good_mask; @@ -430,8 +398,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, BUG_ON(mm->task_size == 0); slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); - slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", - addr, len, flags, topdown, use_cache); + slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n", + addr, len, flags, topdown); if (len > mm->task_size) return -ENOMEM; @@ -503,8 +471,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing * slices for that size */ - newaddr = slice_find_area(mm, len, good_mask, psize, topdown, - use_cache); + newaddr = slice_find_area(mm, len, good_mask, psize, topdown); if (newaddr != -ENOMEM) { /* Found within the good mask, we don't have to setup, * we thus return directly @@ -536,8 +503,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * anywhere in the good area. */ if (addr) { - addr = slice_find_area(mm, len, good_mask, psize, topdown, - use_cache); + addr = slice_find_area(mm, len, good_mask, psize, topdown); if (addr != -ENOMEM) { slice_dbg(" found area at 0x%lx\n", addr); return addr; @@ -547,15 +513,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing slices * for that size plus free slices */ - addr = slice_find_area(mm, len, potential_mask, psize, topdown, - use_cache); + addr = slice_find_area(mm, len, potential_mask, psize, topdown); #ifdef CONFIG_PPC_64K_PAGES if (addr == -ENOMEM && psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ or_mask(potential_mask, compat_mask); addr = slice_find_area(mm, len, potential_mask, psize, - topdown, use_cache); + topdown); } #endif @@ -586,8 +551,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long flags) { return slice_get_unmapped_area(addr, len, flags, - current->mm->context.user_psize, - 0, 1); + current->mm->context.user_psize, 0); } unsigned long arch_get_unmapped_area_topdown(struct file *filp, @@ -597,8 +561,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long flags) { return slice_get_unmapped_area(addr0, len, flags, - current->mm->context.user_psize, - 1, 1); + current->mm->context.user_psize, 1); } unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index df32a838dcf..6888cad5103 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -414,9 +414,9 @@ static void setup_page_sizes(void) #ifdef CONFIG_PPC_FSL_BOOK3E unsigned int mmucfg = mfspr(SPRN_MMUCFG); + int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E); - if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) && - (mmu_has_feature(MMU_FTR_TYPE_FSL_E))) { + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); unsigned int min_pg, max_pg; @@ -442,6 +442,20 @@ static void setup_page_sizes(void) goto no_indirect; } + + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { + u32 tlb1ps = mfspr(SPRN_TLB1PS); + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (tlb1ps & (1U << (def->shift - 10))) { + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + } + + goto no_indirect; + } #endif tlb0cfg = mfspr(SPRN_TLB0CFG); |