summaryrefslogtreecommitdiffstats
path: root/arch/ppc64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ppc64/mm')
-rw-r--r--arch/ppc64/mm/Makefile2
-rw-r--r--arch/ppc64/mm/hash_low.S8
-rw-r--r--arch/ppc64/mm/hash_native.c164
-rw-r--r--arch/ppc64/mm/hash_utils.c22
-rw-r--r--arch/ppc64/mm/hugetlbpage.c230
-rw-r--r--arch/ppc64/mm/imalloc.c20
-rw-r--r--arch/ppc64/mm/init.c122
-rw-r--r--arch/ppc64/mm/numa.c10
-rw-r--r--arch/ppc64/mm/stab.c35
9 files changed, 236 insertions, 377 deletions
diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile
index ac522d57b2a..3695d00d347 100644
--- a/arch/ppc64/mm/Makefile
+++ b/arch/ppc64/mm/Makefile
@@ -6,6 +6,6 @@ EXTRA_CFLAGS += -mno-minimal-toc
obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \
slb_low.o slb.o stab.o mmap.o
-obj-$(CONFIG_DISCONTIGMEM) += numa.o
+obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o
diff --git a/arch/ppc64/mm/hash_low.S b/arch/ppc64/mm/hash_low.S
index c23d46956dd..fbff24827ae 100644
--- a/arch/ppc64/mm/hash_low.S
+++ b/arch/ppc64/mm/hash_low.S
@@ -170,9 +170,7 @@ htab_insert_pte:
/* Call ppc_md.hpte_insert */
ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,0 /* primary slot */
- li r8,0 /* not bolted and not large */
- li r9,0
+ li r6,0 /* no vflags */
_GLOBAL(htab_call_hpte_insert1)
bl . /* Will be patched by htab_finish_init() */
cmpdi 0,r3,0
@@ -192,9 +190,7 @@ _GLOBAL(htab_call_hpte_insert1)
/* Call ppc_md.hpte_insert */
ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,1 /* secondary slot */
- li r8,0 /* not bolted and not large */
- li r9,0
+ li r6,HPTE_V_SECONDARY@l /* secondary slot */
_GLOBAL(htab_call_hpte_insert2)
bl . /* Will be patched by htab_finish_init() */
cmpdi 0,r3,0
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
index 52b6b930534..a6abd3a979b 100644
--- a/arch/ppc64/mm/hash_native.c
+++ b/arch/ppc64/mm/hash_native.c
@@ -27,9 +27,9 @@
static DEFINE_SPINLOCK(native_tlbie_lock);
-static inline void native_lock_hpte(HPTE *hptep)
+static inline void native_lock_hpte(hpte_t *hptep)
{
- unsigned long *word = &hptep->dw0.dword0;
+ unsigned long *word = &hptep->v;
while (1) {
if (!test_and_set_bit(HPTE_LOCK_BIT, word))
@@ -39,32 +39,28 @@ static inline void native_lock_hpte(HPTE *hptep)
}
}
-static inline void native_unlock_hpte(HPTE *hptep)
+static inline void native_unlock_hpte(hpte_t *hptep)
{
- unsigned long *word = &hptep->dw0.dword0;
+ unsigned long *word = &hptep->v;
asm volatile("lwsync":::"memory");
clear_bit(HPTE_LOCK_BIT, word);
}
long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, int secondary,
- unsigned long hpteflags, int bolted, int large)
+ unsigned long prpn, unsigned long vflags,
+ unsigned long rflags)
{
unsigned long arpn = physRpn_to_absRpn(prpn);
- HPTE *hptep = htab_address + hpte_group;
- Hpte_dword0 dw0;
- HPTE lhpte;
+ hpte_t *hptep = htab_address + hpte_group;
+ unsigned long hpte_v, hpte_r;
int i;
for (i = 0; i < HPTES_PER_GROUP; i++) {
- dw0 = hptep->dw0.dw0;
-
- if (!dw0.v) {
+ if (! (hptep->v & HPTE_V_VALID)) {
/* retry with lock held */
native_lock_hpte(hptep);
- dw0 = hptep->dw0.dw0;
- if (!dw0.v)
+ if (! (hptep->v & HPTE_V_VALID))
break;
native_unlock_hpte(hptep);
}
@@ -75,56 +71,45 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- lhpte.dw1.dword1 = 0;
- lhpte.dw1.dw1.rpn = arpn;
- lhpte.dw1.flags.flags = hpteflags;
-
- lhpte.dw0.dword0 = 0;
- lhpte.dw0.dw0.avpn = va >> 23;
- lhpte.dw0.dw0.h = secondary;
- lhpte.dw0.dw0.bolted = bolted;
- lhpte.dw0.dw0.v = 1;
-
- if (large) {
- lhpte.dw0.dw0.l = 1;
- lhpte.dw0.dw0.avpn &= ~0x1UL;
- }
-
- hptep->dw1.dword1 = lhpte.dw1.dword1;
+ hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
+ if (vflags & HPTE_V_LARGE)
+ va &= ~(1UL << HPTE_V_AVPN_SHIFT);
+ hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+ hptep->r = hpte_r;
/* Guarantee the second dword is visible before the valid bit */
__asm__ __volatile__ ("eieio" : : : "memory");
-
/*
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
- hptep->dw0.dword0 = lhpte.dw0.dword0;
+ hptep->v = hpte_v;
__asm__ __volatile__ ("ptesync" : : : "memory");
- return i | (secondary << 3);
+ return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
static long native_hpte_remove(unsigned long hpte_group)
{
- HPTE *hptep;
- Hpte_dword0 dw0;
+ hpte_t *hptep;
int i;
int slot_offset;
+ unsigned long hpte_v;
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + hpte_group + slot_offset;
- dw0 = hptep->dw0.dw0;
+ hpte_v = hptep->v;
- if (dw0.v && !dw0.bolted) {
+ if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
/* retry with lock held */
native_lock_hpte(hptep);
- dw0 = hptep->dw0.dw0;
- if (dw0.v && !dw0.bolted)
+ hpte_v = hptep->v;
+ if ((hpte_v & HPTE_V_VALID)
+ && !(hpte_v & HPTE_V_BOLTED))
break;
native_unlock_hpte(hptep);
}
@@ -137,15 +122,15 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->dw0.dword0 = 0;
+ hptep->v = 0;
return i;
}
-static inline void set_pp_bit(unsigned long pp, HPTE *addr)
+static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
{
unsigned long old;
- unsigned long *p = &addr->dw1.dword1;
+ unsigned long *p = &addr->r;
__asm__ __volatile__(
"1: ldarx %0,0,%3\n\
@@ -163,11 +148,11 @@ static inline void set_pp_bit(unsigned long pp, HPTE *addr)
*/
static long native_hpte_find(unsigned long vpn)
{
- HPTE *hptep;
+ hpte_t *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
- Hpte_dword0 dw0;
+ unsigned long hpte_v;
hash = hpt_hash(vpn, 0);
@@ -175,10 +160,11 @@ static long native_hpte_find(unsigned long vpn)
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
- dw0 = hptep->dw0.dw0;
+ hpte_v = hptep->v;
- if ((dw0.avpn == (vpn >> 11)) && dw0.v &&
- (dw0.h == j)) {
+ if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ && (hpte_v & HPTE_V_VALID)
+ && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
if (j)
slot = -slot;
@@ -195,20 +181,21 @@ static long native_hpte_find(unsigned long vpn)
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long va, int large, int local)
{
- HPTE *hptep = htab_address + slot;
- Hpte_dword0 dw0;
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v;
unsigned long avpn = va >> 23;
int ret = 0;
if (large)
- avpn &= ~0x1UL;
+ avpn &= ~1;
native_lock_hpte(hptep);
- dw0 = hptep->dw0.dw0;
+ hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((dw0.avpn != avpn) || !dw0.v) {
+ if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+ || !(hpte_v & HPTE_V_VALID)) {
native_unlock_hpte(hptep);
ret = -1;
} else {
@@ -244,7 +231,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
{
unsigned long vsid, va, vpn, flags = 0;
long slot;
- HPTE *hptep;
+ hpte_t *hptep;
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
vsid = get_kernel_vsid(ea);
@@ -269,26 +256,27 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
static void native_hpte_invalidate(unsigned long slot, unsigned long va,
int large, int local)
{
- HPTE *hptep = htab_address + slot;
- Hpte_dword0 dw0;
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v;
unsigned long avpn = va >> 23;
unsigned long flags;
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
if (large)
- avpn &= ~0x1UL;
+ avpn &= ~1;
local_irq_save(flags);
native_lock_hpte(hptep);
- dw0 = hptep->dw0.dw0;
+ hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((dw0.avpn != avpn) || !dw0.v) {
+ if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+ || !(hpte_v & HPTE_V_VALID)) {
native_unlock_hpte(hptep);
} else {
/* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->dw0.dword0 = 0;
+ hptep->v = 0;
}
/* Invalidate the tlb */
@@ -304,13 +292,57 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
local_irq_restore(flags);
}
+/*
+ * clear all mappings on kexec. All cpus are in real mode (or they will
+ * be when they isi), and we are the only one left. We rely on our kernel
+ * mapping being 0xC0's and the hardware ignoring those two real bits.
+ *
+ * TODO: add batching support when enabled. remember, no dynamic memory here,
+ * athough there is the control page available...
+ */
+static void native_hpte_clear(void)
+{
+ unsigned long slot, slots, flags;
+ hpte_t *hptep = htab_address;
+ unsigned long hpte_v;
+ unsigned long pteg_count;
+
+ pteg_count = htab_hash_mask + 1;
+
+ local_irq_save(flags);
+
+ /* we take the tlbie lock and hold it. Some hardware will
+ * deadlock if we try to tlbie from two processors at once.
+ */
+ spin_lock(&native_tlbie_lock);
+
+ slots = pteg_count * HPTES_PER_GROUP;
+
+ for (slot = 0; slot < slots; slot++, hptep++) {
+ /*
+ * we could lock the pte here, but we are the only cpu
+ * running, right? and for crash dump, we probably
+ * don't want to wait for a maybe bad cpu.
+ */
+ hpte_v = hptep->v;
+
+ if (hpte_v & HPTE_V_VALID) {
+ hptep->v = 0;
+ tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+ }
+ }
+
+ spin_unlock(&native_tlbie_lock);
+ local_irq_restore(flags);
+}
+
static void native_flush_hash_range(unsigned long context,
unsigned long number, int local)
{
unsigned long vsid, vpn, va, hash, secondary, slot, flags, avpn;
int i, j;
- HPTE *hptep;
- Hpte_dword0 dw0;
+ hpte_t *hptep;
+ unsigned long hpte_v;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
/* XXX fix for large ptes */
@@ -346,14 +378,15 @@ static void native_flush_hash_range(unsigned long context,
native_lock_hpte(hptep);
- dw0 = hptep->dw0.dw0;
+ hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((dw0.avpn != avpn) || !dw0.v) {
+ if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
+ || !(hpte_v & HPTE_V_VALID)) {
native_unlock_hpte(hptep);
} else {
/* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->dw0.dword0 = 0;
+ hptep->v = 0;
}
j++;
@@ -415,7 +448,8 @@ void hpte_init_native(void)
ppc_md.hpte_updatepp = native_hpte_updatepp;
ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
ppc_md.hpte_insert = native_hpte_insert;
- ppc_md.hpte_remove = native_hpte_remove;
+ ppc_md.hpte_remove = native_hpte_remove;
+ ppc_md.hpte_clear_all = native_hpte_clear;
if (tlb_batching_enabled())
ppc_md.flush_hash_range = native_flush_hash_range;
htab_finish_init();
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
index 0a0f97008d0..623b5d130c3 100644
--- a/arch/ppc64/mm/hash_utils.c
+++ b/arch/ppc64/mm/hash_utils.c
@@ -75,8 +75,8 @@
extern unsigned long dart_tablebase;
#endif /* CONFIG_U3_DART */
-HPTE *htab_address;
-unsigned long htab_hash_mask;
+hpte_t *htab_address;
+unsigned long htab_hash_mask;
extern unsigned long _SDR1;
@@ -97,11 +97,15 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end,
unsigned long addr;
unsigned int step;
unsigned long tmp_mode;
+ unsigned long vflags;
- if (large)
+ if (large) {
step = 16*MB;
- else
+ vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
+ } else {
step = 4*KB;
+ vflags = HPTE_V_BOLTED;
+ }
for (addr = start; addr < end; addr += step) {
unsigned long vpn, hash, hpteg;
@@ -129,12 +133,12 @@ static inline void create_pte_mapping(unsigned long start, unsigned long end,
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, tmp_mode, 1, large);
+ vflags, tmp_mode);
else
#endif /* CONFIG_PPC_PSERIES */
ret = native_hpte_insert(hpteg, va,
virt_to_abs(addr) >> PAGE_SHIFT,
- 0, tmp_mode, 1, large);
+ vflags, tmp_mode);
if (ret == -1) {
ppc64_terminate_msg(0x20, "create_pte_mapping");
@@ -195,7 +199,7 @@ void __init htab_initialize(void)
memset((void *)table, 0, htab_size_bytes);
}
- mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX;
+ mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
/* On U3 based machines, we need to reserve the DART area and
* _NOT_ map it to avoid cache paradoxes as it's remapped non
@@ -310,10 +314,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
vsid = get_vsid(mm->context.id, ea);
break;
- case IO_REGION_ID:
- mm = &ioremap_mm;
- vsid = get_kernel_vsid(ea);
- break;
case VMALLOC_REGION_ID:
mm = &init_mm;
vsid = get_kernel_vsid(ea);
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index d3bf86a5c1a..f9524602818 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -121,7 +121,7 @@ static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr
return hugepte_offset(dir, addr);
}
-static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pud_t *pud;
@@ -134,7 +134,7 @@ static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return hugepte_offset(pud, addr);
}
-static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pud_t *pud;
@@ -147,25 +147,6 @@ static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return hugepte_alloc(mm, pud, addr);
}
-static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, struct page *page,
- pte_t *ptep, int write_access)
-{
- pte_t entry;
-
- add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
- if (write_access) {
- entry =
- pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
- } else {
- entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
- }
- entry = pte_mkyoung(entry);
- entry = pte_mkhuge(entry);
-
- set_pte_at(mm, addr, ptep, entry);
-}
-
/*
* This function checks for proper alignment of input addr and len parameters.
*/
@@ -259,80 +240,6 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len)
return -EINVAL;
}
-int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
- struct vm_area_struct *vma)
-{
- pte_t *src_pte, *dst_pte, entry;
- struct page *ptepage;
- unsigned long addr = vma->vm_start;
- unsigned long end = vma->vm_end;
- int err = -ENOMEM;
-
- while (addr < end) {
- dst_pte = huge_pte_alloc(dst, addr);
- if (!dst_pte)
- goto out;
-
- src_pte = huge_pte_offset(src, addr);
- entry = *src_pte;
-
- ptepage = pte_page(entry);
- get_page(ptepage);
- add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
- set_pte_at(dst, addr, dst_pte, entry);
-
- addr += HPAGE_SIZE;
- }
-
- err = 0;
- out:
- return err;
-}
-
-int
-follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page **pages, struct vm_area_struct **vmas,
- unsigned long *position, int *length, int i)
-{
- unsigned long vpfn, vaddr = *position;
- int remainder = *length;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
-
- vpfn = vaddr/PAGE_SIZE;
- while (vaddr < vma->vm_end && remainder) {
- if (pages) {
- pte_t *pte;
- struct page *page;
-
- pte = huge_pte_offset(mm, vaddr);
-
- /* hugetlb should be locked, and hence, prefaulted */
- WARN_ON(!pte || pte_none(*pte));
-
- page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
-
- WARN_ON(!PageCompound(page));
-
- get_page(page);
- pages[i] = page;
- }
-
- if (vmas)
- vmas[i] = vma;
-
- vaddr += PAGE_SIZE;
- ++vpfn;
- --remainder;
- ++i;
- }
-
- *length = remainder;
- *position = vaddr;
-
- return i;
-}
-
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
@@ -363,89 +270,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
return NULL;
}
-void unmap_hugepage_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- struct mm_struct *mm = vma->vm_mm;
- unsigned long addr;
- pte_t *ptep;
- struct page *page;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
- BUG_ON((start % HPAGE_SIZE) != 0);
- BUG_ON((end % HPAGE_SIZE) != 0);
-
- for (addr = start; addr < end; addr += HPAGE_SIZE) {
- pte_t pte;
-
- ptep = huge_pte_offset(mm, addr);
- if (!ptep || pte_none(*ptep))
- continue;
-
- pte = *ptep;
- page = pte_page(pte);
- pte_clear(mm, addr, ptep);
-
- put_page(page);
- }
- add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
- flush_tlb_pending();
-}
-
-int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr;
- int ret = 0;
-
- WARN_ON(!is_vm_hugetlb_page(vma));
- BUG_ON((vma->vm_start % HPAGE_SIZE) != 0);
- BUG_ON((vma->vm_end % HPAGE_SIZE) != 0);
-
- spin_lock(&mm->page_table_lock);
- for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
- unsigned long idx;
- pte_t *pte = huge_pte_alloc(mm, addr);
- struct page *page;
-
- if (!pte) {
- ret = -ENOMEM;
- goto out;
- }
- if (! pte_none(*pte))
- continue;
-
- idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
- + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
- page = find_get_page(mapping, idx);
- if (!page) {
- /* charge the fs quota first */
- if (hugetlb_get_quota(mapping)) {
- ret = -ENOMEM;
- goto out;
- }
- page = alloc_huge_page();
- if (!page) {
- hugetlb_put_quota(mapping);
- ret = -ENOMEM;
- goto out;
- }
- ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
- if (! ret) {
- unlock_page(page);
- } else {
- hugetlb_put_quota(mapping);
- free_huge_page(page);
- goto out;
- }
- }
- set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
- }
-out:
- spin_unlock(&mm->page_table_lock);
- return ret;
-}
-
/* Because we have an exclusive hugepage region which lies within the
* normal user address space, we have to take special measures to make
* non-huge mmap()s evade the hugepage reserved regions. */
@@ -468,7 +292,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
&& !is_hugepage_only_range(mm, addr,len))
return addr;
}
- start_addr = addr = mm->free_area_cache;
+ if (len > mm->cached_hole_size) {
+ start_addr = addr = mm->free_area_cache;
+ } else {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ }
full_search:
vma = find_vma(mm, addr);
@@ -492,6 +321,8 @@ full_search:
mm->free_area_cache = addr + len;
return addr;
}
+ if (addr + mm->cached_hole_size < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end;
vma = vma->vm_next;
}
@@ -499,6 +330,7 @@ full_search:
/* Make sure we didn't miss any holes */
if (start_addr != TASK_UNMAPPED_BASE) {
start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
goto full_search;
}
return -ENOMEM;
@@ -520,6 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct vm_area_struct *vma, *prev_vma;
struct mm_struct *mm = current->mm;
unsigned long base = mm->mmap_base, addr = addr0;
+ unsigned long largest_hole = mm->cached_hole_size;
int first_time = 1;
/* requested length too big for entire address space */
@@ -540,6 +373,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
+ if (len <= largest_hole) {
+ largest_hole = 0;
+ mm->free_area_cache = base;
+ }
try_again:
/* make sure it can fit in the remaining address space */
if (mm->free_area_cache < len)
@@ -568,13 +405,21 @@ hugepage_recheck:
* vma->vm_start, use it:
*/
if (addr+len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end)))
+ (!prev_vma || (addr >= prev_vma->vm_end))) {
/* remember the address as a hint for next time */
- return (mm->free_area_cache = addr);
- else
+ mm->cached_hole_size = largest_hole;
+ return (mm->free_area_cache = addr);
+ } else {
/* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end)
+ if (mm->free_area_cache == vma->vm_end) {
mm->free_area_cache = vma->vm_start;
+ mm->cached_hole_size = largest_hole;
+ }
+ }
+
+ /* remember the largest hole we saw so far */
+ if (addr + largest_hole < vma->vm_start)
+ largest_hole = vma->vm_start - addr;
/* try just below the current vma->vm_start */
addr = vma->vm_start-len;
@@ -587,6 +432,7 @@ fail:
*/
if (first_time) {
mm->free_area_cache = base;
+ largest_hole = 0;
first_time = 0;
goto try_again;
}
@@ -597,11 +443,13 @@ fail:
* allocations.
*/
mm->free_area_cache = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = ~0UL;
addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
/*
* Restore the topdown base:
*/
mm->free_area_cache = base;
+ mm->cached_hole_size = ~0UL;
return addr;
}
@@ -735,7 +583,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
pte_t *ptep;
unsigned long va, vpn;
pte_t old_pte, new_pte;
- unsigned long hpteflags, prpn;
+ unsigned long rflags, prpn;
long slot;
int err = 1;
@@ -778,9 +626,9 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
old_pte = *ptep;
new_pte = old_pte;
- hpteflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- hpteflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+ rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
/* Check if pte already has an hpte (case 2) */
if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
@@ -793,7 +641,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
- if (ppc_md.hpte_updatepp(slot, hpteflags, va, 1, local) == -1)
+ if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
}
@@ -813,10 +661,10 @@ repeat:
/* Add in WIMG bits */
/* XXX We should store these in the pte */
- hpteflags |= _PAGE_COHERENT;
+ rflags |= _PAGE_COHERENT;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn, 0,
- hpteflags, 0, 1);
+ slot = ppc_md.hpte_insert(hpte_group, va, prpn,
+ HPTE_V_LARGE, rflags);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
@@ -824,7 +672,7 @@ repeat:
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- 1, hpteflags, 0, 1);
+ HPTE_V_LARGE, rflags);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
index cb8727f3267..b6e75b891ac 100644
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -15,6 +15,7 @@
#include <asm/pgtable.h>
#include <asm/semaphore.h>
#include <asm/imalloc.h>
+#include <asm/cacheflush.h>
static DECLARE_MUTEX(imlist_sem);
struct vm_struct * imlist = NULL;
@@ -285,29 +286,32 @@ struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
return area;
}
-unsigned long im_free(void * addr)
+void im_free(void * addr)
{
struct vm_struct **p, *tmp;
- unsigned long ret_size = 0;
if (!addr)
- return ret_size;
- if ((PAGE_SIZE-1) & (unsigned long) addr) {
+ return;
+ if ((unsigned long) addr & ~PAGE_MASK) {
printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr);
- return ret_size;
+ return;
}
down(&imlist_sem);
for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
- ret_size = tmp->size;
*p = tmp->next;
+
+ /* XXX: do we need the lock? */
+ spin_lock(&init_mm.page_table_lock);
+ unmap_vm_area(tmp);
+ spin_unlock(&init_mm.page_table_lock);
+
kfree(tmp);
up(&imlist_sem);
- return ret_size;
+ return;
}
}
up(&imlist_sem);
printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
addr);
- return ret_size;
}
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index 4b42aff74d7..e58a24d4287 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -73,9 +73,6 @@ static unsigned long phbs_io_bot = PHBS_IO_BASE;
extern pgd_t swapper_pg_dir[];
extern struct task_struct *current_set[NR_CPUS];
-extern pgd_t ioremap_dir[];
-pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir;
-
unsigned long klimit = (unsigned long)_end;
unsigned long _SDR1=0;
@@ -101,7 +98,7 @@ void show_mem(void)
printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; i++) {
- page = pgdat->node_mem_map + i;
+ page = pgdat_page_nr(pgdat, i);
total++;
if (PageReserved(page))
reserved++;
@@ -137,69 +134,6 @@ void iounmap(volatile void __iomem *addr)
#else
-static void unmap_im_area_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end)
-{
- pte_t *pte;
-
- pte = pte_offset_kernel(pmd, addr);
- do {
- pte_t ptent = ptep_get_and_clear(&ioremap_mm, addr, pte);
- WARN_ON(!pte_none(ptent) && !pte_present(ptent));
- } while (pte++, addr += PAGE_SIZE, addr != end);
-}
-
-static inline void unmap_im_area_pmd(pud_t *pud, unsigned long addr,
- unsigned long end)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
- unmap_im_area_pte(pmd, addr, next);
- } while (pmd++, addr = next, addr != end);
-}
-
-static inline void unmap_im_area_pud(pgd_t *pgd, unsigned long addr,
- unsigned long end)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- unmap_im_area_pmd(pud, addr, next);
- } while (pud++, addr = next, addr != end);
-}
-
-static void unmap_im_area(unsigned long addr, unsigned long end)
-{
- struct mm_struct *mm = &ioremap_mm;
- unsigned long next;
- pgd_t *pgd;
-
- spin_lock(&mm->page_table_lock);
-
- pgd = pgd_offset_i(addr);
- flush_cache_vunmap(addr, end);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- unmap_im_area_pud(pgd, addr, next);
- } while (pgd++, addr = next, addr != end);
- flush_tlb_kernel_range(start, end);
-
- spin_unlock(&mm->page_table_lock);
-}
-
/*
* map_io_page currently only called by __ioremap
* map_io_page adds an entry to the ioremap page table
@@ -214,21 +148,21 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
unsigned long vsid;
if (mem_init_done) {
- spin_lock(&ioremap_mm.page_table_lock);
- pgdp = pgd_offset_i(ea);
- pudp = pud_alloc(&ioremap_mm, pgdp, ea);
+ spin_lock(&init_mm.page_table_lock);
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
if (!pudp)
return -ENOMEM;
- pmdp = pmd_alloc(&ioremap_mm, pudp, ea);
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
if (!pmdp)
return -ENOMEM;
- ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea);
+ ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
if (!ptep)
return -ENOMEM;
pa = abs_to_phys(pa);
- set_pte_at(&ioremap_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
- spin_unlock(&ioremap_mm.page_table_lock);
+ spin_unlock(&init_mm.page_table_lock);
} else {
unsigned long va, vpn, hash, hpteg;
@@ -246,9 +180,10 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
/* Panic if a pte grpup is full */
- if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0,
- _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX,
- 1, 0) == -1) {
+ if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
+ HPTE_V_BOLTED,
+ _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
+ == -1) {
panic("map_io_page: could not insert mapping");
}
}
@@ -267,13 +202,9 @@ static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
for (i = 0; i < size; i += PAGE_SIZE)
if (map_io_page(ea+i, pa+i, flags))
- goto failure;
+ return NULL;
return (void __iomem *) (ea + (addr & ~PAGE_MASK));
- failure:
- if (mem_init_done)
- unmap_im_area(ea, ea + size);
- return NULL;
}
@@ -381,19 +312,14 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea,
*/
void iounmap(volatile void __iomem *token)
{
- unsigned long address, size;
void *addr;
if (!mem_init_done)
return;
addr = (void *) ((unsigned long __force) token & PAGE_MASK);
-
- if ((size = im_free(addr)) == 0)
- return;
- address = (unsigned long)addr;
- unmap_im_area(address, address + size);
+ im_free(addr);
}
static int iounmap_subset_regions(unsigned long addr, unsigned long size)
@@ -606,7 +532,7 @@ EXPORT_SYMBOL(page_is_ram);
* Initialize the bootmem system and give it all the memory we
* have available.
*/
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
void __init do_init_bootmem(void)
{
unsigned long i;
@@ -628,12 +554,20 @@ void __init do_init_bootmem(void)
max_pfn = max_low_pfn;
- /* add all physical memory to the bootmem map. Also find the first */
+ /* Add all physical memory to the bootmem map, mark each area
+ * present.
+ */
for (i=0; i < lmb.memory.cnt; i++) {
unsigned long physbase, size;
+ unsigned long start_pfn, end_pfn;
physbase = lmb.memory.region[i].physbase;
size = lmb.memory.region[i].size;
+
+ start_pfn = physbase >> PAGE_SHIFT;
+ end_pfn = start_pfn + (size >> PAGE_SHIFT);
+ memory_present(0, start_pfn, end_pfn);
+
free_bootmem(physbase, size);
}
@@ -672,7 +606,7 @@ void __init paging_init(void)
free_area_init_node(0, NODE_DATA(0), zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
}
-#endif /* CONFIG_DISCONTIGMEM */
+#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
static struct kcore_list kcore_vmem;
@@ -703,7 +637,7 @@ module_init(setup_kcore);
void __init mem_init(void)
{
-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NEED_MULTIPLE_NODES
int nid;
#endif
pg_data_t *pgdat;
@@ -714,7 +648,7 @@ void __init mem_init(void)
num_physpages = max_low_pfn; /* RAM is assumed contiguous */
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NEED_MULTIPLE_NODES
for_each_online_node(nid) {
if (NODE_DATA(nid)->node_spanned_pages != 0) {
printk("freeing bootmem node %x\n", nid);
@@ -729,7 +663,7 @@ void __init mem_init(void)
for_each_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; i++) {
- page = pgdat->node_mem_map + i;
+ page = pgdat_page_nr(pgdat, i);
if (PageReserved(page))
reservedpages++;
}
diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c
index ea862ec643d..0b191f2de01 100644
--- a/arch/ppc64/mm/numa.c
+++ b/arch/ppc64/mm/numa.c
@@ -440,6 +440,8 @@ new_range:
for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
numa_domain;
+ memory_present(numa_domain, start >> PAGE_SHIFT,
+ (start + size) >> PAGE_SHIFT);
if (--ranges)
goto new_range;
@@ -481,6 +483,7 @@ static void __init setup_nonnuma(void)
for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
+ memory_present(0, 0, init_node_data[0].node_end_pfn);
}
static void __init dump_numa_topology(void)
@@ -644,7 +647,12 @@ void __init do_init_bootmem(void)
new_range:
mem_start = read_n_cells(addr_cells, &memcell_buf);
mem_size = read_n_cells(size_cells, &memcell_buf);
- numa_domain = numa_enabled ? of_node_numa_domain(memory) : 0;
+ if (numa_enabled) {
+ numa_domain = of_node_numa_domain(memory);
+ if (numa_domain >= MAX_NUMNODES)
+ numa_domain = 0;
+ } else
+ numa_domain = 0;
if (numa_domain != nid)
continue;
diff --git a/arch/ppc64/mm/stab.c b/arch/ppc64/mm/stab.c
index df4bbe14153..1b83f002bf2 100644
--- a/arch/ppc64/mm/stab.c
+++ b/arch/ppc64/mm/stab.c
@@ -18,6 +18,8 @@
#include <asm/mmu_context.h>
#include <asm/paca.h>
#include <asm/cputable.h>
+#include <asm/lmb.h>
+#include <asm/abs_addr.h>
struct stab_entry {
unsigned long esid_data;
@@ -224,6 +226,39 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
extern void slb_initialize(void);
/*
+ * Allocate segment tables for secondary CPUs. These must all go in
+ * the first (bolted) segment, so that do_stab_bolted won't get a
+ * recursive segment miss on the segment table itself.
+ */
+void stabs_alloc(void)
+{
+ int cpu;
+
+ if (cpu_has_feature(CPU_FTR_SLB))
+ return;
+
+ for_each_cpu(cpu) {
+ unsigned long newstab;
+
+ if (cpu == 0)
+ continue; /* stab for CPU 0 is statically allocated */
+
+ newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1<<SID_SHIFT);
+ if (! newstab)
+ panic("Unable to allocate segment table for CPU %d.\n",
+ cpu);
+
+ newstab += KERNELBASE;
+
+ memset((void *)newstab, 0, PAGE_SIZE);
+
+ paca[cpu].stab_addr = newstab;
+ paca[cpu].stab_real = virt_to_abs(newstab);
+ printk(KERN_DEBUG "Segment table for CPU %d at 0x%lx virtual, 0x%lx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real);
+ }
+}
+
+/*
* Build an entry for the base kernel segment and put it into
* the segment table or SLB. All other segment table or SLB
* entries are faulted in.