summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-07-18 02:39:39 -0700
committerDavid S. Miller <davem@davemloft.net>2008-07-18 02:39:39 -0700
commit49997d75152b3d23c53b0fa730599f2f74c92c65 (patch)
tree46e93126170d02cfec9505172e545732c1b69656 /arch/powerpc/mm
parenta0c80b80e0fb48129e4e9d6a9ede914f9ff1850d (diff)
parent5b664cb235e97afbf34db9c4d77f08ebd725335e (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: Documentation/powerpc/booting-without-of.txt drivers/atm/Makefile drivers/net/fs_enet/fs_enet-main.c drivers/pci/pci-acpi.c net/8021q/vlan.c net/iucv/iucv.c
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/44x_mmu.c29
-rw-r--r--arch/powerpc/mm/fault.c3
-rw-r--r--arch/powerpc/mm/hash_low_64.S17
-rw-r--r--arch/powerpc/mm/hash_utils_64.c42
-rw-r--r--arch/powerpc/mm/hugetlbpage.c13
-rw-r--r--arch/powerpc/mm/init_32.c7
-rw-r--r--arch/powerpc/mm/init_64.c4
-rw-r--r--arch/powerpc/mm/mem.c9
-rw-r--r--arch/powerpc/mm/mmu_decl.h6
-rw-r--r--arch/powerpc/mm/numa.c310
-rw-r--r--arch/powerpc/mm/pgtable_32.c6
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c27
-rw-r--r--arch/powerpc/mm/slice.c177
-rw-r--r--arch/powerpc/mm/stab.c4
-rw-r--r--arch/powerpc/mm/tlb_64.c9
15 files changed, 495 insertions, 168 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 953fb919eb0..98052ac9658 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -27,6 +27,7 @@
#include <asm/mmu.h>
#include <asm/system.h>
#include <asm/page.h>
+#include <asm/cacheflush.h>
#include "mmu_decl.h"
@@ -37,11 +38,35 @@ unsigned int tlb_44x_index; /* = 0 */
unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
int icache_44x_need_flush;
+static void __init ppc44x_update_tlb_hwater(void)
+{
+ extern unsigned int tlb_44x_patch_hwater_D[];
+ extern unsigned int tlb_44x_patch_hwater_I[];
+
+ /* The TLB miss handlers hard codes the watermark in a cmpli
+ * instruction to improve performances rather than loading it
+ * from the global variable. Thus, we patch the instructions
+ * in the 2 TLB miss handlers when updating the value
+ */
+ tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) |
+ tlb_44x_hwater;
+ flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0],
+ (unsigned long)&tlb_44x_patch_hwater_D[1]);
+ tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) |
+ tlb_44x_hwater;
+ flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0],
+ (unsigned long)&tlb_44x_patch_hwater_I[1]);
+}
+
/*
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem
*/
static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
{
+ unsigned int entry = tlb_44x_hwater--;
+
+ ppc44x_update_tlb_hwater();
+
__asm__ __volatile__(
"tlbwe %2,%3,%4\n"
"tlbwe %1,%3,%5\n"
@@ -50,7 +75,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
"r" (phys),
"r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
- "r" (tlb_44x_hwater--), /* slot for this TLB entry */
+ "r" (entry),
"i" (PPC44x_TLB_PAGEID),
"i" (PPC44x_TLB_XLAT),
"i" (PPC44x_TLB_ATTRIB));
@@ -58,6 +83,8 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
void __init MMU_init_hw(void)
{
+ ppc44x_update_tlb_hwater();
+
flush_instruction_cache();
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7b251079926..1707d00331f 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -306,7 +306,8 @@ good_area:
flush_dcache_icache_page(page);
set_bit(PG_arch_1, &page->flags);
}
- pte_update(ptep, 0, _PAGE_HWEXEC);
+ pte_update(ptep, 0, _PAGE_HWEXEC |
+ _PAGE_ACCESSED);
_tlbie(address, mm->context.id);
pte_unmap_unlock(ptep, ptl);
up_read(&mm->mmap_sem);
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 70f4c833fa3..a719f53921a 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -388,7 +388,7 @@ _GLOBAL(__hash_page_4K)
*/
rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
- ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED
oris r30,r30,_PAGE_COMBO@h
/* Write the linux PTE atomically (setting busy) */
stdcx. r30,0,r6
@@ -468,7 +468,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
* go to out-of-line code to try to modify the HPTE. We look for
* the bit at (1 >> (index + 32))
*/
- andi. r0,r31,_PAGE_HASHPTE
+ rldicl. r0,r31,64-12,48
li r26,0 /* Default hidx */
beq htab_insert_pte
@@ -726,11 +726,11 @@ BEGIN_FTR_SECTION
bne- ht64_bail_ok
END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
/* Prepare new PTE value (turn access RW into DIRTY, then
- * add BUSY,HASHPTE and ACCESSED)
+ * add BUSY and ACCESSED)
*/
rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
or r30,r30,r31
- ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED
/* Write the linux PTE atomically (setting busy) */
stdcx. r30,0,r6
bne- 1b
@@ -798,18 +798,21 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
/* Check if we may already be in the hashtable, in this case, we
* go to out-of-line code to try to modify the HPTE
*/
- andi. r0,r31,_PAGE_HASHPTE
+ rldicl. r0,r31,64-12,48
bne ht64_modify_pte
ht64_insert_pte:
/* Clear hpte bits in new pte (we also clear BUSY btw) and
- * add _PAGE_HASHPTE
+ * add _PAGE_HPTE_SUB0
*/
lis r0,_PAGE_HPTEFLAGS@h
ori r0,r0,_PAGE_HPTEFLAGS@l
andc r30,r30,r0
+#ifdef CONFIG_PPC_64K_PAGES
+ oris r30,r30,_PAGE_HPTE_SUB0@h
+#else
ori r30,r30,_PAGE_HASHPTE
-
+#endif
/* Phyical address in r5 */
rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
sldi r5,r5,PAGE_SHIFT
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0f2d239d94c..8d3b58ebd38 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(linear_map_hash_lock);
/* Pre-POWER4 CPUs (4k pages only)
*/
-struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults_old[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
@@ -134,7 +134,7 @@ struct mmu_psize_def mmu_psize_defaults_old[] = {
*
* Support for 16Mb large pages
*/
-struct mmu_psize_def mmu_psize_defaults_gp[] = {
+static struct mmu_psize_def mmu_psize_defaults_gp[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
@@ -533,8 +533,6 @@ void __init htab_initialize(void)
unsigned long base = 0, size = 0, limit;
int i;
- extern unsigned long tce_alloc_start, tce_alloc_end;
-
DBG(" -> htab_initialize()\n");
/* Initialize segment sizes */
@@ -697,6 +695,28 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
return pp;
}
+#ifdef CONFIG_PPC_MM_SLICES
+unsigned int get_paca_psize(unsigned long addr)
+{
+ unsigned long index, slices;
+
+ if (addr < SLICE_LOW_TOP) {
+ slices = get_paca()->context.low_slices_psize;
+ index = GET_LOW_SLICE_INDEX(addr);
+ } else {
+ slices = get_paca()->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ }
+ return (slices >> (index * 4)) & 0xF;
+}
+
+#else
+unsigned int get_paca_psize(unsigned long addr)
+{
+ return get_paca()->context.user_psize;
+}
+#endif
+
/*
* Demote a segment to using 4k pages.
* For now this makes the whole process use 4k pages.
@@ -704,13 +724,13 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
#ifdef CONFIG_PPC_64K_PAGES
void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
{
- if (mm->context.user_psize == MMU_PAGE_4K)
+ if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
return;
- slice_set_user_psize(mm, MMU_PAGE_4K);
+ slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
#ifdef CONFIG_SPU_BASE
spu_flush_all_slbs(mm);
#endif
- if (get_paca()->context.user_psize != MMU_PAGE_4K) {
+ if (get_paca_psize(addr) != MMU_PAGE_4K) {
get_paca()->context = mm->context;
slb_flush_and_rebolt();
}
@@ -794,11 +814,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
DBG_LOW(" user region with no mm !\n");
return 1;
}
-#ifdef CONFIG_PPC_MM_SLICES
psize = get_slice_psize(mm, ea);
-#else
- psize = mm->context.user_psize;
-#endif
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
break;
@@ -870,7 +886,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
/* Do actual hashing */
#ifdef CONFIG_PPC_64K_PAGES
/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
- if (pte_val(*ptep) & _PAGE_4K_PFN) {
+ if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
}
@@ -899,7 +915,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
}
}
if (user_region) {
- if (psize != get_paca()->context.user_psize) {
+ if (psize != get_paca_psize(ea)) {
get_paca()->context = mm->context;
slb_flush_and_rebolt();
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a02266dad21..0d12fba31bc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -458,8 +458,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
old_pte = pte_val(*ptep);
if (old_pte & _PAGE_BUSY)
goto out;
- new_pte = old_pte | _PAGE_BUSY |
- _PAGE_ACCESSED | _PAGE_HASHPTE;
+ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
old_pte, new_pte));
@@ -499,12 +498,14 @@ repeat:
HPTES_PER_GROUP) & ~0x7UL;
/* clear HPTE slot informations in new PTE */
+#ifdef CONFIG_PPC_64K_PAGES
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
+#else
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
-
+#endif
/* Add in WIMG bits */
- /* XXX We should store these in the pte */
- /* --BenH: I think they are ... */
- rflags |= _PAGE_COHERENT;
+ rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+ _PAGE_COHERENT | _PAGE_GUARDED));
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 1952b4d3fa7..388ceda632f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -43,6 +43,7 @@
#include <asm/btext.h>
#include <asm/tlb.h>
#include <asm/sections.h>
+#include <asm/system.h>
#include "mmu_decl.h"
@@ -56,8 +57,8 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-unsigned long total_memory;
-unsigned long total_lowmem;
+phys_addr_t total_memory;
+phys_addr_t total_lowmem;
phys_addr_t memstart_addr = (phys_addr_t)~0ull;
EXPORT_SYMBOL(memstart_addr);
@@ -76,8 +77,6 @@ void MMU_init(void);
/* XXX should be in current.h -- paulus */
extern struct task_struct *current_set[NR_CPUS];
-extern int init_bootmem_done;
-
/*
* this tells the system to map all of ram with the segregs
* (i.e. page tables) instead of the bats.
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6aa65375abf..6ef63caca68 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -185,7 +185,7 @@ void pgtable_cache_init(void)
* do this by hand as the proffered address may not be correctly aligned.
* Subtraction of non-aligned pointers produces undefined results.
*/
-unsigned long __meminit vmemmap_section_start(unsigned long page)
+static unsigned long __meminit vmemmap_section_start(unsigned long page)
{
unsigned long offset = page - ((unsigned long)(vmemmap));
@@ -198,7 +198,7 @@ unsigned long __meminit vmemmap_section_start(unsigned long page)
* which overlaps this vmemmap page is initialised then this page is
* initialised already.
*/
-int __meminit vmemmap_populated(unsigned long start, int page_size)
+static int __meminit vmemmap_populated(unsigned long start, int page_size)
{
unsigned long end = start + page_size;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 51f82d83bf1..1ca2235f096 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -44,6 +44,7 @@
#include <asm/btext.h>
#include <asm/tlb.h>
#include <asm/sections.h>
+#include <asm/sparsemem.h>
#include <asm/vdso.h>
#include <asm/fixmap.h>
@@ -329,7 +330,7 @@ static int __init mark_nonram_nosave(void)
void __init paging_init(void)
{
unsigned long total_ram = lmb_phys_mem_size();
- unsigned long top_of_ram = lmb_end_of_DRAM();
+ phys_addr_t top_of_ram = lmb_end_of_DRAM();
unsigned long max_zone_pfns[MAX_NR_ZONES];
#ifdef CONFIG_PPC32
@@ -348,10 +349,10 @@ void __init paging_init(void)
kmap_prot = PAGE_KERNEL;
#endif /* CONFIG_HIGHMEM */
- printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
- top_of_ram, total_ram);
+ printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n",
+ (u64)top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
- (top_of_ram - total_ram) >> 20);
+ (long int)((top_of_ram - total_ram) >> 20));
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
#ifdef CONFIG_HIGHMEM
max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 04802252a64..fab3cfad409 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -29,7 +29,7 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea,
#ifdef CONFIG_PPC32
extern void mapin_ram(void);
extern int map_page(unsigned long va, phys_addr_t pa, int flags);
-extern void setbat(int index, unsigned long virt, unsigned long phys,
+extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, int flags);
extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, int flags, unsigned int pid);
@@ -49,8 +49,8 @@ extern unsigned int num_tlbcam_entries;
extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
extern phys_addr_t __initial_memory_limit_addr;
-extern unsigned long total_memory;
-extern unsigned long total_lowmem;
+extern phys_addr_t total_memory;
+extern phys_addr_t total_lowmem;
extern phys_addr_t memstart_addr;
extern phys_addr_t lowmem_end_addr;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index dc704da363e..cf4bffba6f7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -268,6 +268,144 @@ static unsigned long __devinit read_n_cells(int n, const unsigned int **buf)
return result;
}
+struct of_drconf_cell {
+ u64 base_addr;
+ u32 drc_index;
+ u32 reserved;
+ u32 aa_index;
+ u32 flags;
+};
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
+/*
+ * Read the next lmb list entry from the ibm,dynamic-memory property
+ * and return the information in the provided of_drconf_cell structure.
+ */
+static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
+{
+ const u32 *cp;
+
+ drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
+
+ cp = *cellp;
+ drmem->drc_index = cp[0];
+ drmem->reserved = cp[1];
+ drmem->aa_index = cp[2];
+ drmem->flags = cp[3];
+
+ *cellp = cp + 4;
+}
+
+/*
+ * Retreive and validate the ibm,dynamic-memory property of the device tree.
+ *
+ * The layout of the ibm,dynamic-memory property is a number N of lmb
+ * list entries followed by N lmb list entries. Each lmb list entry
+ * contains information as layed out in the of_drconf_cell struct above.
+ */
+static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
+{
+ const u32 *prop;
+ u32 len, entries;
+
+ prop = of_get_property(memory, "ibm,dynamic-memory", &len);
+ if (!prop || len < sizeof(unsigned int))
+ return 0;
+
+ entries = *prop++;
+
+ /* Now that we know the number of entries, revalidate the size
+ * of the property read in to ensure we have everything
+ */
+ if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
+ return 0;
+
+ *dm = prop;
+ return entries;
+}
+
+/*
+ * Retreive and validate the ibm,lmb-size property for drconf memory
+ * from the device tree.
+ */
+static u64 of_get_lmb_size(struct device_node *memory)
+{
+ const u32 *prop;
+ u32 len;
+
+ prop = of_get_property(memory, "ibm,lmb-size", &len);
+ if (!prop || len < sizeof(unsigned int))
+ return 0;
+
+ return read_n_cells(n_mem_size_cells, &prop);
+}
+
+struct assoc_arrays {
+ u32 n_arrays;
+ u32 array_sz;
+ const u32 *arrays;
+};
+
+/*
+ * Retreive and validate the list of associativity arrays for drconf
+ * memory from the ibm,associativity-lookup-arrays property of the
+ * device tree..
+ *
+ * The layout of the ibm,associativity-lookup-arrays property is a number N
+ * indicating the number of associativity arrays, followed by a number M
+ * indicating the size of each associativity array, followed by a list
+ * of N associativity arrays.
+ */
+static int of_get_assoc_arrays(struct device_node *memory,
+ struct assoc_arrays *aa)
+{
+ const u32 *prop;
+ u32 len;
+
+ prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
+ if (!prop || len < 2 * sizeof(unsigned int))
+ return -1;
+
+ aa->n_arrays = *prop++;
+ aa->array_sz = *prop++;
+
+ /* Now that we know the number of arrrays and size of each array,
+ * revalidate the size of the property read in.
+ */
+ if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
+ return -1;
+
+ aa->arrays = prop;
+ return 0;
+}
+
+/*
+ * This is like of_node_to_nid_single() for memory represented in the
+ * ibm,dynamic-reconfiguration-memory node.
+ */
+static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
+ struct assoc_arrays *aa)
+{
+ int default_nid = 0;
+ int nid = default_nid;
+ int index;
+
+ if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
+ !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
+ drmem->aa_index < aa->n_arrays) {
+ index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
+ nid = aa->arrays[index];
+
+ if (nid == 0xffff || nid >= MAX_NUMNODES)
+ nid = default_nid;
+ }
+
+ return nid;
+}
+
/*
* Figure out to which domain a cpu belongs and stick it there.
* Return the id of the domain used.
@@ -355,57 +493,50 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start,
*/
static void __init parse_drconf_memory(struct device_node *memory)
{
- const unsigned int *lm, *dm, *aa;
- unsigned int ls, ld, la;
- unsigned int n, aam, aalen;
- unsigned long lmb_size, size, start;
- int nid, default_nid = 0;
- unsigned int ai, flags;
-
- lm = of_get_property(memory, "ibm,lmb-size", &ls);
- dm = of_get_property(memory, "ibm,dynamic-memory", &ld);
- aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la);
- if (!lm || !dm || !aa ||
- ls < sizeof(unsigned int) || ld < sizeof(unsigned int) ||
- la < 2 * sizeof(unsigned int))
+ const u32 *dm;
+ unsigned int n, rc;
+ unsigned long lmb_size, size;
+ int nid;
+ struct assoc_arrays aa;
+
+ n = of_get_drconf_memory(memory, &dm);
+ if (!n)
+ return;
+
+ lmb_size = of_get_lmb_size(memory);
+ if (!lmb_size)
return;
- lmb_size = read_n_cells(n_mem_size_cells, &lm);
- n = *dm++; /* number of LMBs */
- aam = *aa++; /* number of associativity lists */
- aalen = *aa++; /* length of each associativity list */
- if (ld < (n * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int) ||
- la < (aam * aalen + 2) * sizeof(unsigned int))
+ rc = of_get_assoc_arrays(memory, &aa);
+ if (rc)
return;
for (; n != 0; --n) {
- start = read_n_cells(n_mem_addr_cells, &dm);
- ai = dm[2];
- flags = dm[3];
- dm += 4;
- /* 0x80 == reserved, 0x8 = assigned to us */
- if ((flags & 0x80) || !(flags & 0x8))
+ struct of_drconf_cell drmem;
+
+ read_drconf_cell(&drmem, &dm);
+
+ /* skip this block if the reserved bit is set in flags (0x80)
+ or if the block is not assigned to this partition (0x8) */
+ if ((drmem.flags & DRCONF_MEM_RESERVED)
+ || !(drmem.flags & DRCONF_MEM_ASSIGNED))
continue;
- nid = default_nid;
- /* flags & 0x40 means associativity index is invalid */
- if (min_common_depth > 0 && min_common_depth <= aalen &&
- (flags & 0x40) == 0 && ai < aam) {
- /* this is like of_node_to_nid_single */
- nid = aa[ai * aalen + min_common_depth - 1];
- if (nid == 0xffff || nid >= MAX_NUMNODES)
- nid = default_nid;
- }
- fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT),
- &nid);
+ nid = of_drconf_to_nid_single(&drmem, &aa);
+
+ fake_numa_create_new_node(
+ ((drmem.base_addr + lmb_size) >> PAGE_SHIFT),
+ &nid);
+
node_set_online(nid);
- size = numa_enforce_memory_limit(start, lmb_size);
+ size = numa_enforce_memory_limit(drmem.base_addr, lmb_size);
if (!size)
continue;
- add_active_range(nid, start >> PAGE_SHIFT,
- (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
+ add_active_range(nid, drmem.base_addr >> PAGE_SHIFT,
+ (drmem.base_addr >> PAGE_SHIFT)
+ + (size >> PAGE_SHIFT));
}
}
@@ -770,6 +901,79 @@ early_param("numa", early_numa);
#ifdef CONFIG_MEMORY_HOTPLUG
/*
+ * Validate the node associated with the memory section we are
+ * trying to add.
+ */
+int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size,
+ unsigned long scn_addr)
+{
+ nodemask_t nodes;
+
+ if (*nid < 0 || !node_online(*nid))
+ *nid = any_online_node(NODE_MASK_ALL);
+
+ if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) {
+ nodes_setall(nodes);
+ while (NODE_DATA(*nid)->node_spanned_pages == 0) {
+ node_clear(*nid, nodes);
+ *nid = any_online_node(nodes);
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Find the node associated with a hot added memory section represented
+ * by the ibm,dynamic-reconfiguration-memory node.
+ */
+static int hot_add_drconf_scn_to_nid(struct device_node *memory,
+ unsigned long scn_addr)
+{
+ const u32 *dm;
+ unsigned int n, rc;
+ unsigned long lmb_size;
+ int default_nid = any_online_node(NODE_MASK_ALL);
+ int nid;
+ struct assoc_arrays aa;
+
+ n = of_get_drconf_memory(memory, &dm);
+ if (!n)
+ return default_nid;;
+
+ lmb_size = of_get_lmb_size(memory);
+ if (!lmb_size)
+ return default_nid;
+
+ rc = of_get_assoc_arrays(memory, &aa);
+ if (rc)
+ return default_nid;
+
+ for (; n != 0; --n) {
+ struct of_drconf_cell drmem;
+
+ read_drconf_cell(&drmem, &dm);
+
+ /* skip this block if it is reserved or not assigned to
+ * this partition */
+ if ((drmem.flags & DRCONF_MEM_RESERVED)
+ || !(drmem.flags & DRCONF_MEM_ASSIGNED))
+ continue;
+
+ nid = of_drconf_to_nid_single(&drmem, &aa);
+
+ if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size,
+ scn_addr))
+ return nid;
+ }
+
+ BUG(); /* section address should be found above */
+ return 0;
+}
+
+/*
* Find the node associated with a hot added memory section. Section
* corresponds to a SPARSEMEM section, not an LMB. It is assumed that
* sections are fully contained within a single LMB.
@@ -777,12 +981,17 @@ early_param("numa", early_numa);
int hot_add_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory = NULL;
- nodemask_t nodes;
- int default_nid = any_online_node(NODE_MASK_ALL);
int nid;
if (!numa_enabled || (min_common_depth < 0))
- return default_nid;
+ return any_online_node(NODE_MASK_ALL);
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
+ of_node_put(memory);
+ return nid;
+ }
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long start, size;
@@ -801,13 +1010,9 @@ ha_new_range:
size = read_n_cells(n_mem_size_cells, &memcell_buf);
nid = of_node_to_nid_single(memory);
- /* Domains not present at boot default to 0 */
- if (nid < 0 || !node_online(nid))
- nid = default_nid;
-
- if ((scn_addr >= start) && (scn_addr < (start + size))) {
+ if (valid_hot_add_scn(&nid, start, size, scn_addr)) {
of_node_put(memory);
- goto got_nid;
+ return nid;
}
if (--ranges) /* process all ranges in cell */
@@ -815,14 +1020,5 @@ ha_new_range:
}
BUG(); /* section address should be found above */
return 0;
-
- /* Temporary code to ensure that returned node is not empty */
-got_nid:
- nodes_setall(nodes);
- while (NODE_DATA(nid)->node_spanned_pages == 0) {
- node_clear(nid, nodes);
- nid = any_online_node(nodes);
- }
- return nid;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index e0ff59f2113..c7584072dfc 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -53,9 +53,9 @@ extern void hash_page_sync(void);
#endif
#ifdef HAVE_BATS
-extern unsigned long v_mapped_by_bats(unsigned long va);
-extern unsigned long p_mapped_by_bats(unsigned long pa);
-void setbat(int index, unsigned long virt, unsigned long phys,
+extern phys_addr_t v_mapped_by_bats(unsigned long va);
+extern unsigned long p_mapped_by_bats(phys_addr_t pa);
+void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, int flags);
#else /* !HAVE_BATS */
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9f156874..c53145f6194 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -38,21 +38,18 @@ struct hash_pte *Hash, *Hash_end;
unsigned long Hash_size, Hash_mask;
unsigned long _SDR1;
-union ubat { /* BAT register values to be loaded */
- struct ppc_bat bat;
- u32 word[2];
-} BATS[8][2]; /* 8 pairs of IBAT, DBAT */
+struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
struct batrange { /* stores address ranges mapped by BATs */
unsigned long start;
unsigned long limit;
- unsigned long phys;
+ phys_addr_t phys;
} bat_addrs[8];
/*
* Return PA for this VA if it is mapped by a BAT, or 0
*/
-unsigned long v_mapped_by_bats(unsigned long va)
+phys_addr_t v_mapped_by_bats(unsigned long va)
{
int b;
for (b = 0; b < 4; ++b)
@@ -64,7 +61,7 @@ unsigned long v_mapped_by_bats(unsigned long va)
/*
* Return VA for a given PA or 0 if not mapped
*/
-unsigned long p_mapped_by_bats(unsigned long pa)
+unsigned long p_mapped_by_bats(phys_addr_t pa)
{
int b;
for (b = 0; b < 4; ++b)
@@ -119,12 +116,12 @@ unsigned long __init mmu_mapin_ram(void)
* The parameters are not checked; in particular size must be a power
* of 2 between 128k and 256M.
*/
-void __init setbat(int index, unsigned long virt, unsigned long phys,
+void __init setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, int flags)
{
unsigned int bl;
int wimgxpp;
- union ubat *bat = BATS[index];
+ struct ppc_bat *bat = BATS[index];
if (((flags & _PAGE_NO_CACHE) == 0) &&
cpu_has_feature(CPU_FTR_NEED_COHERENT))
@@ -137,15 +134,15 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
| _PAGE_COHERENT | _PAGE_GUARDED);
wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
- bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
- bat[1].word[1] = phys | wimgxpp;
+ bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
#ifndef CONFIG_KGDB /* want user access for breakpoints */
if (flags & _PAGE_USER)
#endif
- bat[1].bat.batu.vp = 1;
+ bat[1].batu |= 1; /* Vp = 1 */
if (flags & _PAGE_GUARDED) {
/* G bit must be zero in IBATs */
- bat[0].word[0] = bat[0].word[1] = 0;
+ bat[0].batu = bat[0].batl = 0;
} else {
/* make IBAT same as DBAT */
bat[0] = bat[1];
@@ -158,8 +155,8 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
| _PAGE_COHERENT);
wimgxpp |= (flags & _PAGE_RW)?
((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
- bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
- bat->word[1] = phys | bl | 0x40; /* V=1 */
+ bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
+ bat->batl = phys | bl | 0x40; /* V=1 */
}
bat_addrs[index].start = virt;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index ad928edafb0..db44e02e045 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -215,10 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
mm->context.high_slices_psize);
spin_unlock_irqrestore(&slice_convert_lock, flags);
- mb();
- /* XXX this is sub-optimal but will do for now */
- on_each_cpu(slice_flush_segments, mm, 0, 1);
#ifdef CONFIG_SPU_BASE
spu_flush_all_slbs(mm);
#endif
@@ -384,17 +381,34 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
}
+#define or_mask(dst, src) do { \
+ (dst).low_slices |= (src).low_slices; \
+ (dst).high_slices |= (src).high_slices; \
+} while (0)
+
+#define andnot_mask(dst, src) do { \
+ (dst).low_slices &= ~(src).low_slices; \
+ (dst).high_slices &= ~(src).high_slices; \
+} while (0)
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define MMU_PAGE_BASE MMU_PAGE_64K
+#else
+#define MMU_PAGE_BASE MMU_PAGE_4K
+#endif
+
unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
int topdown, int use_cache)
{
- struct slice_mask mask;
+ struct slice_mask mask = {0, 0};
struct slice_mask good_mask;
struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
- int pmask_set = 0;
+ struct slice_mask compat_mask = {0, 0};
int fixed = (flags & MAP_FIXED);
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
struct mm_struct *mm = current->mm;
+ unsigned long newaddr;
/* Sanity checks */
BUG_ON(mm->task_size == 0);
@@ -416,21 +430,48 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
if (!fixed && addr) {
addr = _ALIGN_UP(addr, 1ul << pshift);
slice_dbg(" aligned addr=%lx\n", addr);
+ /* Ignore hint if it's too large or overlaps a VMA */
+ if (addr > mm->task_size - len ||
+ !slice_area_is_free(mm, addr, len))
+ addr = 0;
}
- /* First makeup a "good" mask of slices that have the right size
+ /* First make up a "good" mask of slices that have the right size
* already
*/
good_mask = slice_mask_for_size(mm, psize);
slice_print_mask(" good_mask", good_mask);
- /* First check hint if it's valid or if we have MAP_FIXED */
- if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) {
+ /*
+ * Here "good" means slices that are already the right page size,
+ * "compat" means slices that have a compatible page size (i.e.
+ * 4k in a 64k pagesize kernel), and "free" means slices without
+ * any VMAs.
+ *
+ * If MAP_FIXED:
+ * check if fits in good | compat => OK
+ * check if fits in good | compat | free => convert free
+ * else bad
+ * If have hint:
+ * check if hint fits in good => OK
+ * check if hint fits in good | free => convert free
+ * Otherwise:
+ * search in good, found => OK
+ * search in good | free, found => convert free
+ * search in good | compat | free, found => convert free.
+ */
- /* Don't bother with hint if it overlaps a VMA */
- if (!fixed && !slice_area_is_free(mm, addr, len))
- goto search;
+#ifdef CONFIG_PPC_64K_PAGES
+ /* If we support combo pages, we can allow 64k pages in 4k slices */
+ if (psize == MMU_PAGE_64K) {
+ compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
+ if (fixed)
+ or_mask(good_mask, compat_mask);
+ }
+#endif
+ /* First check hint if it's valid or if we have MAP_FIXED */
+ if (addr != 0 || fixed) {
/* Build a mask for the requested range */
mask = slice_range_to_mask(addr, len);
slice_print_mask(" mask", mask);
@@ -442,54 +483,66 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
slice_dbg(" fits good !\n");
return addr;
}
-
- /* We don't fit in the good mask, check what other slices are
- * empty and thus can be converted
+ } else {
+ /* Now let's see if we can find something in the existing
+ * slices for that size
*/
- potential_mask = slice_mask_for_free(mm);
- potential_mask.low_slices |= good_mask.low_slices;
- potential_mask.high_slices |= good_mask.high_slices;
- pmask_set = 1;
- slice_print_mask(" potential", potential_mask);
- if (slice_check_fit(mask, potential_mask)) {
- slice_dbg(" fits potential !\n");
- goto convert;
+ newaddr = slice_find_area(mm, len, good_mask, psize, topdown,
+ use_cache);
+ if (newaddr != -ENOMEM) {
+ /* Found within the good mask, we don't have to setup,
+ * we thus return directly
+ */
+ slice_dbg(" found area at 0x%lx\n", newaddr);
+ return newaddr;
}
}
- /* If we have MAP_FIXED and failed the above step, then error out */
+ /* We don't fit in the good mask, check what other slices are
+ * empty and thus can be converted
+ */
+ potential_mask = slice_mask_for_free(mm);
+ or_mask(potential_mask, good_mask);
+ slice_print_mask(" potential", potential_mask);
+
+ if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
+ slice_dbg(" fits potential !\n");
+ goto convert;
+ }
+
+ /* If we have MAP_FIXED and failed the above steps, then error out */
if (fixed)
return -EBUSY;
- search:
slice_dbg(" search...\n");
- /* Now let's see if we can find something in the existing slices
- * for that size
+ /* If we had a hint that didn't work out, see if we can fit
+ * anywhere in the good area.
*/
- addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache);
- if (addr != -ENOMEM) {
- /* Found within the good mask, we don't have to setup,
- * we thus return directly
- */
- slice_dbg(" found area at 0x%lx\n", addr);
- return addr;
- }
-
- /* Won't fit, check what can be converted */
- if (!pmask_set) {
- potential_mask = slice_mask_for_free(mm);
- potential_mask.low_slices |= good_mask.low_slices;
- potential_mask.high_slices |= good_mask.high_slices;
- pmask_set = 1;
- slice_print_mask(" potential", potential_mask);
+ if (addr) {
+ addr = slice_find_area(mm, len, good_mask, psize, topdown,
+ use_cache);
+ if (addr != -ENOMEM) {
+ slice_dbg(" found area at 0x%lx\n", addr);
+ return addr;
+ }
}
/* Now let's see if we can find something in the existing slices
- * for that size
+ * for that size plus free slices
*/
addr = slice_find_area(mm, len, potential_mask, psize, topdown,
use_cache);
+
+#ifdef CONFIG_PPC_64K_PAGES
+ if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
+ /* retry the search with 4k-page slices included */
+ or_mask(potential_mask, compat_mask);
+ addr = slice_find_area(mm, len, potential_mask, psize,
+ topdown, use_cache);
+ }
+#endif
+
if (addr == -ENOMEM)
return -ENOMEM;
@@ -498,7 +551,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
slice_print_mask(" mask", mask);
convert:
- slice_convert(mm, mask, psize);
+ andnot_mask(mask, good_mask);
+ andnot_mask(mask, compat_mask);
+ if (mask.low_slices || mask.high_slices) {
+ slice_convert(mm, mask, psize);
+ if (psize > MMU_PAGE_BASE)
+ on_each_cpu(slice_flush_segments, mm, 1);
+ }
return addr;
}
@@ -598,6 +657,36 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
spin_unlock_irqrestore(&slice_convert_lock, flags);
}
+void slice_set_psize(struct mm_struct *mm, unsigned long address,
+ unsigned int psize)
+{
+ unsigned long i, flags;
+ u64 *p;
+
+ spin_lock_irqsave(&slice_convert_lock, flags);
+ if (address < SLICE_LOW_TOP) {
+ i = GET_LOW_SLICE_INDEX(address);
+ p = &mm->context.low_slices_psize;
+ } else {
+ i = GET_HIGH_SLICE_INDEX(address);
+ p = &mm->context.high_slices_psize;
+ }
+ *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4));
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+}
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize)
+{
+ struct slice_mask mask = slice_range_to_mask(start, len);
+
+ slice_convert(mm, mask, psize);
+}
+
/*
* is_hugepage_only_range() is used by generic code to verify wether
* a normal mmap mapping (non hugetlbfs) is valid on a given area.
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index efbbd13d93e..60e6032a808 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -30,8 +30,8 @@ struct stab_entry {
};
#define NR_STAB_CACHE_ENTRIES 8
-DEFINE_PER_CPU(long, stab_cache_ptr);
-DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
+static DEFINE_PER_CPU(long, stab_cache_ptr);
+static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
/*
* Create a segment table entry for the given esid/vsid pair.
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index e2d867ce1c7..a01b5c608ff 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -37,8 +37,8 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
* include/asm-powerpc/tlb.h file -- tgall
*/
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
-unsigned long pte_freelist_forced_free;
+static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+static unsigned long pte_freelist_forced_free;
struct pte_freelist_batch
{
@@ -47,9 +47,6 @@ struct pte_freelist_batch
pgtable_free_t tables[0];
};
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
-unsigned long pte_freelist_forced_free;
-
#define PTE_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
/ sizeof(pgtable_free_t))
@@ -66,7 +63,7 @@ static void pgtable_free_now(pgtable_free_t pgf)
{
pte_freelist_forced_free++;
- smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+ smp_call_function(pte_free_smp_sync, NULL, 1);
pgtable_free(pgf);
}