diff options
Diffstat (limited to 'arch/ppc64/mm')
-rw-r--r-- | arch/ppc64/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/ppc64/mm/hash_low.S | 8 | ||||
-rw-r--r-- | arch/ppc64/mm/hash_native.c | 164 | ||||
-rw-r--r-- | arch/ppc64/mm/hash_utils.c | 22 | ||||
-rw-r--r-- | arch/ppc64/mm/hugetlbpage.c | 230 | ||||
-rw-r--r-- | arch/ppc64/mm/imalloc.c | 20 | ||||
-rw-r--r-- | arch/ppc64/mm/init.c | 122 | ||||
-rw-r--r-- | arch/ppc64/mm/numa.c | 10 | ||||
-rw-r--r-- | arch/ppc64/mm/stab.c | 35 |
9 files changed, 236 insertions, 377 deletions
diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile index ac522d57b2a..3695d00d347 100644 --- a/arch/ppc64/mm/Makefile +++ b/arch/ppc64/mm/Makefile @@ -6,6 +6,6 @@ EXTRA_CFLAGS += -mno-minimal-toc obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ slb_low.o slb.o stab.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S index c23d46956dd..fbff24827ae 100644 --- a/arch/ppc64/mm/hash_low.S +++ b/arch/ppc64/mm/hash_low.S @@ -170,9 +170,7 @@ htab_insert_pte: /* Call ppc_md.hpte_insert */ ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ mr r4,r29 /* Retreive va */ - li r6,0 /* primary slot */ - li r8,0 /* not bolted and not large */ - li r9,0 + li r6,0 /* no vflags */ _GLOBAL(htab_call_hpte_insert1) bl . /* Will be patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -192,9 +190,7 @@ _GLOBAL(htab_call_hpte_insert1) /* Call ppc_md.hpte_insert */ ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ mr r4,r29 /* Retreive va */ - li r6,1 /* secondary slot */ - li r8,0 /* not bolted and not large */ - li r9,0 + li r6,HPTE_V_SECONDARY@l /* secondary slot */ _GLOBAL(htab_call_hpte_insert2) bl . /* Will be patched by htab_finish_init() */ cmpdi 0,r3,0 diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 52b6b930534..a6abd3a979b 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -27,9 +27,9 @@ static DEFINE_SPINLOCK(native_tlbie_lock); -static inline void native_lock_hpte(HPTE *hptep) +static inline void native_lock_hpte(hpte_t *hptep) { - unsigned long *word = &hptep->dw0.dword0; + unsigned long *word = &hptep->v; while (1) { if (!test_and_set_bit(HPTE_LOCK_BIT, word)) @@ -39,32 +39,28 @@ static inline void native_lock_hpte(HPTE *hptep) } } -static inline void native_unlock_hpte(HPTE *hptep) +static inline void native_unlock_hpte(hpte_t *hptep) { - unsigned long *word = &hptep->dw0.dword0; + unsigned long *word = &hptep->v; asm volatile("lwsync":::"memory"); clear_bit(HPTE_LOCK_BIT, word); } long native_hpte_insert(unsigned long hpte_group, unsigned long va, - unsigned long prpn, int secondary, - unsigned long hpteflags, int bolted, int large) + unsigned long prpn, unsigned long vflags, + unsigned long rflags) { unsigned long arpn = physRpn_to_absRpn(prpn); - HPTE *hptep = htab_address + hpte_group; - Hpte_dword0 dw0; - HPTE lhpte; + hpte_t *hptep = htab_address + hpte_group; + unsigned long hpte_v, hpte_r; int i; for (i = 0; i < HPTES_PER_GROUP; i++) { - dw0 = hptep->dw0.dw0; - - if (!dw0.v) { + if (! (hptep->v & HPTE_V_VALID)) { /* retry with lock held */ native_lock_hpte(hptep); - dw0 = hptep->dw0.dw0; - if (!dw0.v) + if (! (hptep->v & HPTE_V_VALID)) break; native_unlock_hpte(hptep); } @@ -75,56 +71,45 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, if (i == HPTES_PER_GROUP) return -1; - lhpte.dw1.dword1 = 0; - lhpte.dw1.dw1.rpn = arpn; - lhpte.dw1.flags.flags = hpteflags; - - lhpte.dw0.dword0 = 0; - lhpte.dw0.dw0.avpn = va >> 23; - lhpte.dw0.dw0.h = secondary; - lhpte.dw0.dw0.bolted = bolted; - lhpte.dw0.dw0.v = 1; - - if (large) { - lhpte.dw0.dw0.l = 1; - lhpte.dw0.dw0.avpn &= ~0x1UL; - } - - hptep->dw1.dword1 = lhpte.dw1.dword1; + hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; + if (vflags & HPTE_V_LARGE) + va &= ~(1UL << HPTE_V_AVPN_SHIFT); + hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + hptep->r = hpte_r; /* Guarantee the second dword is visible before the valid bit */ __asm__ __volatile__ ("eieio" : : : "memory"); - /* * Now set the first dword including the valid bit * NOTE: this also unlocks the hpte */ - hptep->dw0.dword0 = lhpte.dw0.dword0; + hptep->v = hpte_v; __asm__ __volatile__ ("ptesync" : : : "memory"); - return i | (secondary << 3); + return i | (!!(vflags & HPTE_V_SECONDARY) << 3); } static long native_hpte_remove(unsigned long hpte_group) { - HPTE *hptep; - Hpte_dword0 dw0; + hpte_t *hptep; int i; int slot_offset; + unsigned long hpte_v; /* pick a random entry to start at */ slot_offset = mftb() & 0x7; for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + hpte_group + slot_offset; - dw0 = hptep->dw0.dw0; + hpte_v = hptep->v; - if (dw0.v && !dw0.bolted) { + if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) { /* retry with lock held */ native_lock_hpte(hptep); - dw0 = hptep->dw0.dw0; - if (dw0.v && !dw0.bolted) + hpte_v = hptep->v; + if ((hpte_v & HPTE_V_VALID) + && !(hpte_v & HPTE_V_BOLTED)) break; native_unlock_hpte(hptep); } @@ -137,15 +122,15 @@ static long native_hpte_remove(unsigned long hpte_group) return -1; /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->dw0.dword0 = 0; + hptep->v = 0; return i; } -static inline void set_pp_bit(unsigned long pp, HPTE *addr) +static inline void set_pp_bit(unsigned long pp, hpte_t *addr) { unsigned long old; - unsigned long *p = &addr->dw1.dword1; + unsigned long *p = &addr->r; __asm__ __volatile__( "1: ldarx %0,0,%3\n\ @@ -163,11 +148,11 @@ static inline void set_pp_bit(unsigned long pp, HPTE *addr) */ static long native_hpte_find(unsigned long vpn) { - HPTE *hptep; + hpte_t *hptep; unsigned long hash; unsigned long i, j; long slot; - Hpte_dword0 dw0; + unsigned long hpte_v; hash = hpt_hash(vpn, 0); @@ -175,10 +160,11 @@ static long native_hpte_find(unsigned long vpn) slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; - dw0 = hptep->dw0.dw0; + hpte_v = hptep->v; - if ((dw0.avpn == (vpn >> 11)) && dw0.v && - (dw0.h == j)) { + if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + && (hpte_v & HPTE_V_VALID) + && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { /* HPTE matches */ if (j) slot = -slot; @@ -195,20 +181,21 @@ static long native_hpte_find(unsigned long vpn) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long va, int large, int local) { - HPTE *hptep = htab_address + slot; - Hpte_dword0 dw0; + hpte_t *hptep = htab_address + slot; + unsigned long hpte_v; unsigned long avpn = va >> 23; int ret = 0; if (large) - avpn &= ~0x1UL; + avpn &= ~1; native_lock_hpte(hptep); - dw0 = hptep->dw0.dw0; + hpte_v = hptep->v; /* Even if we miss, we need to invalidate the TLB */ - if ((dw0.avpn != avpn) || !dw0.v) { + if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) + || !(hpte_v & HPTE_V_VALID)) { native_unlock_hpte(hptep); ret = -1; } else { @@ -244,7 +231,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) { unsigned long vsid, va, vpn, flags = 0; long slot; - HPTE *hptep; + hpte_t *hptep; int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); vsid = get_kernel_vsid(ea); @@ -269,26 +256,27 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) static void native_hpte_invalidate(unsigned long slot, unsigned long va, int large, int local) { - HPTE *hptep = htab_address + slot; - Hpte_dword0 dw0; + hpte_t *hptep = htab_address + slot; + unsigned long hpte_v; unsigned long avpn = va >> 23; unsigned long flags; int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); if (large) - avpn &= ~0x1UL; + avpn &= ~1; local_irq_save(flags); native_lock_hpte(hptep); - dw0 = hptep->dw0.dw0; + hpte_v = hptep->v; /* Even if we miss, we need to invalidate the TLB */ - if ((dw0.avpn != avpn) || !dw0.v) { + if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) + || !(hpte_v & HPTE_V_VALID)) { native_unlock_hpte(hptep); } else { /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->dw0.dword0 = 0; + hptep->v = 0; } /* Invalidate the tlb */ @@ -304,13 +292,57 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, local_irq_restore(flags); } +/* + * clear all mappings on kexec. All cpus are in real mode (or they will + * be when they isi), and we are the only one left. We rely on our kernel + * mapping being 0xC0's and the hardware ignoring those two real bits. + * + * TODO: add batching support when enabled. remember, no dynamic memory here, + * athough there is the control page available... + */ +static void native_hpte_clear(void) +{ + unsigned long slot, slots, flags; + hpte_t *hptep = htab_address; + unsigned long hpte_v; + unsigned long pteg_count; + + pteg_count = htab_hash_mask + 1; + + local_irq_save(flags); + + /* we take the tlbie lock and hold it. Some hardware will + * deadlock if we try to tlbie from two processors at once. + */ + spin_lock(&native_tlbie_lock); + + slots = pteg_count * HPTES_PER_GROUP; + + for (slot = 0; slot < slots; slot++, hptep++) { + /* + * we could lock the pte here, but we are the only cpu + * running, right? and for crash dump, we probably + * don't want to wait for a maybe bad cpu. + */ + hpte_v = hptep->v; + + if (hpte_v & HPTE_V_VALID) { + hptep->v = 0; + tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); + } + } + + spin_unlock(&native_tlbie_lock); + local_irq_restore(flags); +} + static void native_flush_hash_range(unsigned long context, unsigned long number, int local) { unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn; int i, j; - HPTE *hptep; - Hpte_dword0 dw0; + hpte_t *hptep; + unsigned long hpte_v; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); /* XXX fix for large ptes */ @@ -346,14 +378,15 @@ static void native_flush_hash_range(unsigned long context, native_lock_hpte(hptep); - dw0 = hptep->dw0.dw0; + hpte_v = hptep->v; /* Even if we miss, we need to invalidate the TLB */ - if ((dw0.avpn != avpn) || !dw0.v) { + if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) + || !(hpte_v & HPTE_V_VALID)) { native_unlock_hpte(hptep); } else { /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->dw0.dword0 = 0; + hptep->v = 0; } j++; @@ -415,7 +448,8 @@ void hpte_init_native(void) ppc_md.hpte_updatepp = native_hpte_updatepp; ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_clear_all = native_hpte_clear; if (tlb_batching_enabled()) ppc_md.flush_hash_range = native_flush_hash_range; htab_finish_init(); diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 0a0f97008d0..623b5d130c3 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -75,8 +75,8 @@ extern unsigned long dart_tablebase; #endif /* CONFIG_U3_DART */ -HPTE *htab_address; -unsigned long htab_hash_mask; +hpte_t *htab_address; +unsigned long htab_hash_mask; extern unsigned long _SDR1; @@ -97,11 +97,15 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end, unsigned long addr; unsigned int step; unsigned long tmp_mode; + unsigned long vflags; - if (large) + if (large) { step = 16*MB; - else + vflags = HPTE_V_BOLTED | HPTE_V_LARGE; + } else { step = 4*KB; + vflags = HPTE_V_BOLTED; + } for (addr = start; addr < end; addr += step) { unsigned long vpn, hash, hpteg; @@ -129,12 +133,12 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end, if (systemcfg->platform & PLATFORM_LPAR) ret = pSeries_lpar_hpte_insert(hpteg, va, virt_to_abs(addr) >> PAGE_SHIFT, - 0, tmp_mode, 1, large); + vflags, tmp_mode); else #endif /* CONFIG_PPC_PSERIES */ ret = native_hpte_insert(hpteg, va, virt_to_abs(addr) >> PAGE_SHIFT, - 0, tmp_mode, 1, large); + vflags, tmp_mode); if (ret == -1) { ppc64_terminate_msg(0x20, "create_pte_mapping"); @@ -195,7 +199,7 @@ void __init htab_initialize(void) memset((void *)table, 0, htab_size_bytes); } - mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; + mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; /* On U3 based machines, we need to reserve the DART area and * _NOT_ map it to avoid cache paradoxes as it's remapped non @@ -310,10 +314,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) vsid = get_vsid(mm->context.id, ea); break; - case IO_REGION_ID: - mm = &ioremap_mm; - vsid = get_kernel_vsid(ea); - break; case VMALLOC_REGION_ID: mm = &init_mm; vsid = get_kernel_vsid(ea); diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index d3bf86a5c1a..f9524602818 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -121,7 +121,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr return hugepte_offset(dir, addr); } -static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pud_t *pud; @@ -134,7 +134,7 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return hugepte_offset(pud, addr); } -static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pud_t *pud; @@ -147,25 +147,6 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) return hugepte_alloc(mm, pud, addr); } -static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, struct page *page, - pte_t *ptep, int write_access) -{ - pte_t entry; - - add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); - if (write_access) { - entry = - pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); - } else { - entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); - } - entry = pte_mkyoung(entry); - entry = pte_mkhuge(entry); - - set_pte_at(mm, addr, ptep, entry); -} - /* * This function checks for proper alignment of input addr and len parameters. */ @@ -259,80 +240,6 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len) return -EINVAL; } -int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) -{ - pte_t *src_pte, *dst_pte, entry; - struct page *ptepage; - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - int err = -ENOMEM; - - while (addr < end) { - dst_pte = huge_pte_alloc(dst, addr); - if (!dst_pte) - goto out; - - src_pte = huge_pte_offset(src, addr); - entry = *src_pte; - - ptepage = pte_page(entry); - get_page(ptepage); - add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); - set_pte_at(dst, addr, dst_pte, entry); - - addr += HPAGE_SIZE; - } - - err = 0; - out: - return err; -} - -int -follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, - struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, int *length, int i) -{ - unsigned long vpfn, vaddr = *position; - int remainder = *length; - - WARN_ON(!is_vm_hugetlb_page(vma)); - - vpfn = vaddr/PAGE_SIZE; - while (vaddr < vma->vm_end && remainder) { - if (pages) { - pte_t *pte; - struct page *page; - - pte = huge_pte_offset(mm, vaddr); - - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); - - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; - - WARN_ON(!PageCompound(page)); - - get_page(page); - pages[i] = page; - } - - if (vmas) - vmas[i] = vma; - - vaddr += PAGE_SIZE; - ++vpfn; - --remainder; - ++i; - } - - *length = remainder; - *position = vaddr; - - return i; -} - struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { @@ -363,89 +270,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -void unmap_hugepage_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long addr; - pte_t *ptep; - struct page *page; - - WARN_ON(!is_vm_hugetlb_page(vma)); - BUG_ON((start % HPAGE_SIZE) != 0); - BUG_ON((end % HPAGE_SIZE) != 0); - - for (addr = start; addr < end; addr += HPAGE_SIZE) { - pte_t pte; - - ptep = huge_pte_offset(mm, addr); - if (!ptep || pte_none(*ptep)) - continue; - - pte = *ptep; - page = pte_page(pte); - pte_clear(mm, addr, ptep); - - put_page(page); - } - add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); - flush_tlb_pending(); -} - -int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) -{ - struct mm_struct *mm = current->mm; - unsigned long addr; - int ret = 0; - - WARN_ON(!is_vm_hugetlb_page(vma)); - BUG_ON((vma->vm_start % HPAGE_SIZE) != 0); - BUG_ON((vma->vm_end % HPAGE_SIZE) != 0); - - spin_lock(&mm->page_table_lock); - for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { - unsigned long idx; - pte_t *pte = huge_pte_alloc(mm, addr); - struct page *page; - - if (!pte) { - ret = -ENOMEM; - goto out; - } - if (! pte_none(*pte)) - continue; - - idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) - + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); - page = find_get_page(mapping, idx); - if (!page) { - /* charge the fs quota first */ - if (hugetlb_get_quota(mapping)) { - ret = -ENOMEM; - goto out; - } - page = alloc_huge_page(); - if (!page) { - hugetlb_put_quota(mapping); - ret = -ENOMEM; - goto out; - } - ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); - if (! ret) { - unlock_page(page); - } else { - hugetlb_put_quota(mapping); - free_huge_page(page); - goto out; - } - } - set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE); - } -out: - spin_unlock(&mm->page_table_lock); - return ret; -} - /* Because we have an exclusive hugepage region which lies within the * normal user address space, we have to take special measures to make * non-huge mmap()s evade the hugepage reserved regions. */ @@ -468,7 +292,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, && !is_hugepage_only_range(mm, addr,len)) return addr; } - start_addr = addr = mm->free_area_cache; + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } full_search: vma = find_vma(mm, addr); @@ -492,6 +321,8 @@ full_search: mm->free_area_cache = addr + len; return addr; } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; vma = vma->vm_next; } @@ -499,6 +330,7 @@ full_search: /* Make sure we didn't miss any holes */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; @@ -520,6 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct vm_area_struct *vma, *prev_vma; struct mm_struct *mm = current->mm; unsigned long base = mm->mmap_base, addr = addr0; + unsigned long largest_hole = mm->cached_hole_size; int first_time = 1; /* requested length too big for entire address space */ @@ -540,6 +373,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } + if (len <= largest_hole) { + largest_hole = 0; + mm->free_area_cache = base; + } try_again: /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) @@ -568,13 +405,21 @@ hugepage_recheck: * vma->vm_start, use it: */ if (addr+len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) + (!prev_vma || (addr >= prev_vma->vm_end))) { /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); - else + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } else { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) + if (mm->free_area_cache == vma->vm_end) { mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; + } + } + + /* remember the largest hole we saw so far */ + if (addr + largest_hole < vma->vm_start) + largest_hole = vma->vm_start - addr; /* try just below the current vma->vm_start */ addr = vma->vm_start-len; @@ -587,6 +432,7 @@ fail: */ if (first_time) { mm->free_area_cache = base; + largest_hole = 0; first_time = 0; goto try_again; } @@ -597,11 +443,13 @@ fail: * allocations. */ mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; return addr; } @@ -735,7 +583,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, pte_t *ptep; unsigned long va, vpn; pte_t old_pte, new_pte; - unsigned long hpteflags, prpn; + unsigned long rflags, prpn; long slot; int err = 1; @@ -778,9 +626,9 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, old_pte = *ptep; new_pte = old_pte; - hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); + rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ - hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); + rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); /* Check if pte already has an hpte (case 2) */ if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { @@ -793,7 +641,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; - if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1) + if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; } @@ -813,10 +661,10 @@ repeat: /* Add in WIMG bits */ /* XXX We should store these in the pte */ - hpteflags |= _PAGE_COHERENT; + rflags |= _PAGE_COHERENT; - slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0, - hpteflags, 0, 1); + slot = ppc_md.hpte_insert(hpte_group, va, prpn, + HPTE_V_LARGE, rflags); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { @@ -824,7 +672,7 @@ repeat: hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, prpn, - 1, hpteflags, 0, 1); + HPTE_V_LARGE, rflags); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index cb8727f3267..b6e75b891ac 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -15,6 +15,7 @@ #include <asm/pgtable.h> #include <asm/semaphore.h> #include <asm/imalloc.h> +#include <asm/cacheflush.h> static DECLARE_MUTEX(imlist_sem); struct vm_struct * imlist = NULL; @@ -285,29 +286,32 @@ struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, return area; } -unsigned long im_free(void * addr) +void im_free(void * addr) { struct vm_struct **p, *tmp; - unsigned long ret_size = 0; if (!addr) - return ret_size; - if ((PAGE_SIZE-1) & (unsigned long) addr) { + return; + if ((unsigned long) addr & ~PAGE_MASK) { printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); - return ret_size; + return; } down(&imlist_sem); for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { - ret_size = tmp->size; *p = tmp->next; + + /* XXX: do we need the lock? */ + spin_lock(&init_mm.page_table_lock); + unmap_vm_area(tmp); + spin_unlock(&init_mm.page_table_lock); + kfree(tmp); up(&imlist_sem); - return ret_size; + return; } } up(&imlist_sem); printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, addr); - return ret_size; } diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 4b42aff74d7..e58a24d4287 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -73,9 +73,6 @@ static unsigned long phbs_io_bot = PHBS_IO_BASE; extern pgd_t swapper_pg_dir[]; extern struct task_struct *current_set[NR_CPUS]; -extern pgd_t ioremap_dir[]; -pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir; - unsigned long klimit = (unsigned long)_end; unsigned long _SDR1=0; @@ -101,7 +98,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageReserved(page)) reserved++; @@ -137,69 +134,6 @@ void iounmap(volatile void __iomem *addr) #else -static void unmap_im_area_pte(pmd_t *pmd, unsigned long addr, - unsigned long end) -{ - pte_t *pte; - - pte = pte_offset_kernel(pmd, addr); - do { - pte_t ptent = ptep_get_and_clear(&ioremap_mm, addr, pte); - WARN_ON(!pte_none(ptent) && !pte_present(ptent)); - } while (pte++, addr += PAGE_SIZE, addr != end); -} - -static inline void unmap_im_area_pmd(pud_t *pud, unsigned long addr, - unsigned long end) -{ - pmd_t *pmd; - unsigned long next; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - unmap_im_area_pte(pmd, addr, next); - } while (pmd++, addr = next, addr != end); -} - -static inline void unmap_im_area_pud(pgd_t *pgd, unsigned long addr, - unsigned long end) -{ - pud_t *pud; - unsigned long next; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - unmap_im_area_pmd(pud, addr, next); - } while (pud++, addr = next, addr != end); -} - -static void unmap_im_area(unsigned long addr, unsigned long end) -{ - struct mm_struct *mm = &ioremap_mm; - unsigned long next; - pgd_t *pgd; - - spin_lock(&mm->page_table_lock); - - pgd = pgd_offset_i(addr); - flush_cache_vunmap(addr, end); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - unmap_im_area_pud(pgd, addr, next); - } while (pgd++, addr = next, addr != end); - flush_tlb_kernel_range(start, end); - - spin_unlock(&mm->page_table_lock); -} - /* * map_io_page currently only called by __ioremap * map_io_page adds an entry to the ioremap page table @@ -214,21 +148,21 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) unsigned long vsid; if (mem_init_done) { - spin_lock(&ioremap_mm.page_table_lock); - pgdp = pgd_offset_i(ea); - pudp = pud_alloc(&ioremap_mm, pgdp, ea); + spin_lock(&init_mm.page_table_lock); + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); if (!pudp) return -ENOMEM; - pmdp = pmd_alloc(&ioremap_mm, pudp, ea); + pmdp = pmd_alloc(&init_mm, pudp, ea); if (!pmdp) return -ENOMEM; - ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); + ptep = pte_alloc_kernel(&init_mm, pmdp, ea); if (!ptep) return -ENOMEM; pa = abs_to_phys(pa); - set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); - spin_unlock(&ioremap_mm.page_table_lock); + spin_unlock(&init_mm.page_table_lock); } else { unsigned long va, vpn, hash, hpteg; @@ -246,9 +180,10 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); /* Panic if a pte grpup is full */ - if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0, - _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX, - 1, 0) == -1) { + if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, + HPTE_V_BOLTED, + _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) + == -1) { panic("map_io_page: could not insert mapping"); } } @@ -267,13 +202,9 @@ static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, for (i = 0; i < size; i += PAGE_SIZE) if (map_io_page(ea+i, pa+i, flags)) - goto failure; + return NULL; return (void __iomem *) (ea + (addr & ~PAGE_MASK)); - failure: - if (mem_init_done) - unmap_im_area(ea, ea + size); - return NULL; } @@ -381,19 +312,14 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea, */ void iounmap(volatile void __iomem *token) { - unsigned long address, size; void *addr; if (!mem_init_done) return; addr = (void *) ((unsigned long __force) token & PAGE_MASK); - - if ((size = im_free(addr)) == 0) - return; - address = (unsigned long)addr; - unmap_im_area(address, address + size); + im_free(addr); } static int iounmap_subset_regions(unsigned long addr, unsigned long size) @@ -606,7 +532,7 @@ EXPORT_SYMBOL(page_is_ram); * Initialize the bootmem system and give it all the memory we * have available. */ -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init do_init_bootmem(void) { unsigned long i; @@ -628,12 +554,20 @@ void __init do_init_bootmem(void) max_pfn = max_low_pfn; - /* add all physical memory to the bootmem map. Also find the first */ + /* Add all physical memory to the bootmem map, mark each area + * present. + */ for (i=0; i < lmb.memory.cnt; i++) { unsigned long physbase, size; + unsigned long start_pfn, end_pfn; physbase = lmb.memory.region[i].physbase; size = lmb.memory.region[i].size; + + start_pfn = physbase >> PAGE_SHIFT; + end_pfn = start_pfn + (size >> PAGE_SHIFT); + memory_present(0, start_pfn, end_pfn); + free_bootmem(physbase, size); } @@ -672,7 +606,7 @@ void __init paging_init(void) free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); } -#endif /* CONFIG_DISCONTIGMEM */ +#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static struct kcore_list kcore_vmem; @@ -703,7 +637,7 @@ module_init(setup_kcore); void __init mem_init(void) { -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES int nid; #endif pg_data_t *pgdat; @@ -714,7 +648,7 @@ void __init mem_init(void) num_physpages = max_low_pfn; /* RAM is assumed contiguous */ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { if (NODE_DATA(nid)->node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); @@ -729,7 +663,7 @@ void __init mem_init(void) for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; } diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index ea862ec643d..0b191f2de01 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -440,6 +440,8 @@ new_range: for (i = start ; i < (start+size); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain; + memory_present(numa_domain, start >> PAGE_SHIFT, + (start + size) >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -481,6 +483,7 @@ static void __init setup_nonnuma(void) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; + memory_present(0, 0, init_node_data[0].node_end_pfn); } static void __init dump_numa_topology(void) @@ -644,7 +647,12 @@ void __init do_init_bootmem(void) new_range: mem_start = read_n_cells(addr_cells, &memcell_buf); mem_size = read_n_cells(size_cells, &memcell_buf); - numa_domain = numa_enabled ? of_node_numa_domain(memory) : 0; + if (numa_enabled) { + numa_domain = of_node_numa_domain(memory); + if (numa_domain >= MAX_NUMNODES) + numa_domain = 0; + } else + numa_domain = 0; if (numa_domain != nid) continue; diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c index df4bbe14153..1b83f002bf2 100644 --- a/arch/ppc64/mm/stab.c +++ b/arch/ppc64/mm/stab.c @@ -18,6 +18,8 @@ #include <asm/mmu_context.h> #include <asm/paca.h> #include <asm/cputable.h> +#include <asm/lmb.h> +#include <asm/abs_addr.h> struct stab_entry { unsigned long esid_data; @@ -224,6 +226,39 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) extern void slb_initialize(void); /* + * Allocate segment tables for secondary CPUs. These must all go in + * the first (bolted) segment, so that do_stab_bolted won't get a + * recursive segment miss on the segment table itself. + */ +void stabs_alloc(void) +{ + int cpu; + + if (cpu_has_feature(CPU_FTR_SLB)) + return; + + for_each_cpu(cpu) { + unsigned long newstab; + + if (cpu == 0) + continue; /* stab for CPU 0 is statically allocated */ + + newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT); + if (! newstab) + panic("Unable to allocate segment table for CPU %d.\n", + cpu); + + newstab += KERNELBASE; + + memset((void *)newstab, 0, PAGE_SIZE); + + paca[cpu].stab_addr = newstab; + paca[cpu].stab_real = virt_to_abs(newstab); + printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); + } +} + +/* * Build an entry for the base kernel segment and put it into * the segment table or SLB. All other segment table or SLB * entries are faulted in. |