From c555e520ef794a94dc36a8ded93ece6369ff7ca0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 9 Dec 2008 08:21:32 +0000 Subject: powerpc/mm: Add better comment on careful_allocation() The behavior in careful_allocation() really confused me at first. Add a comment to hopefully make it easier on the next doofus that looks at it. Signed-off-by: Dave Hansen Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/numa.c') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index cf81049e1e5..213664c9cdc 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -840,8 +840,16 @@ static void __init *careful_allocation(int nid, unsigned long size, size, nid); /* - * If the memory came from a previously allocated node, we must - * retry with the bootmem allocator. + * We initialize the nodes in numeric order: 0, 1, 2... + * and hand over control from the LMB allocator to the + * bootmem allocator. If this function is called for + * node 5, then we know that all nodes <5 are using the + * bootmem allocator instead of the LMB allocator. + * + * So, check the nid from which this allocation came + * and double check to see if we need to use bootmem + * instead of the LMB. We don't free the LMB memory + * since it would be useless. */ new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); if (new_nid < nid) { -- cgit v1.2.3-70-g09d2 From 5d21ea2b0e1d9d5d880670dbb9a96efe9b419583 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 9 Dec 2008 08:21:33 +0000 Subject: powerpc/mm:: Cleanup careful_allocation(): bootmem already panics If we fail a bootmem allocation, the bootmem code itself panics. No need to redo it here. Also change the wording of the other panic. We don't strictly have to allocate memory on the specified node. It is just a hint and that node may not even *have* any memory on it. In that case we can and do fall back to other nodes. Signed-off-by: Dave Hansen Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/powerpc/mm/numa.c') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 213664c9cdc..aabf30175eb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -836,7 +836,7 @@ static void __init *careful_allocation(int nid, unsigned long size, ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); if (!ret) - panic("numa.c: cannot allocate %lu bytes on node %d", + panic("numa.c: cannot allocate %lu bytes for node %d", size, nid); /* @@ -856,10 +856,6 @@ static void __init *careful_allocation(int nid, unsigned long size, ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), size, align, 0); - if (!ret) - panic("numa.c: cannot allocate %lu bytes on node %d", - size, new_nid); - ret = __pa(ret); dbg("alloc_bootmem %lx %lx\n", ret, size); -- cgit v1.2.3-70-g09d2 From 0be210fd664b07531cb238bafb453a2a54c2a7a8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 9 Dec 2008 08:21:35 +0000 Subject: powerpc/mm: Make careful_allocation() return virtual addrs Since we memset() the result in both of the uses here, just make careful_alloc() return a virtual address. Also, add a separate variable to store the physial address that comes back from the lmb_alloc() functions. This makes it less likely that someone will screw it up forgetting to convert before returning since the vaddr is always in a void* and the paddr is always in an unsigned long. I admit this is arbitrary since one of its users needs a paddr and one a vaddr, but it does remove a good number of casts. Signed-off-by: Dave Hansen Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/mm/numa.c') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index aabf30175eb..9ec9939f9fb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -822,23 +822,28 @@ static void __init dump_numa_memory_topology(void) * required. nid is the preferred node and end is the physical address of * the highest address in the node. * - * Returns the physical address of the memory. + * Returns the virtual address of the memory. */ static void __init *careful_allocation(int nid, unsigned long size, unsigned long align, unsigned long end_pfn) { + void *ret; int new_nid; - unsigned long ret = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); + unsigned long ret_paddr; + + ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); /* retry over all memory */ - if (!ret) - ret = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); + if (!ret_paddr) + ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); - if (!ret) + if (!ret_paddr) panic("numa.c: cannot allocate %lu bytes for node %d", size, nid); + ret = __va(ret_paddr); + /* * We initialize the nodes in numeric order: 0, 1, 2... * and hand over control from the LMB allocator to the @@ -851,17 +856,15 @@ static void __init *careful_allocation(int nid, unsigned long size, * instead of the LMB. We don't free the LMB memory * since it would be useless. */ - new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT); + new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); if (new_nid < nid) { - ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid), + ret = __alloc_bootmem_node(NODE_DATA(new_nid), size, align, 0); - ret = __pa(ret); - - dbg("alloc_bootmem %lx %lx\n", ret, size); + dbg("alloc_bootmem %p %lx\n", ret, size); } - return (void *)ret; + return ret; } static struct notifier_block __cpuinitdata ppc64_numa_nb = { @@ -956,7 +959,7 @@ void __init do_init_bootmem(void) for_each_online_node(nid) { unsigned long start_pfn, end_pfn; - unsigned long bootmem_paddr; + void *bootmem_vaddr; unsigned long bootmap_pages; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); @@ -971,7 +974,6 @@ void __init do_init_bootmem(void) NODE_DATA(nid) = careful_allocation(nid, sizeof(struct pglist_data), SMP_CACHE_BYTES, end_pfn); - NODE_DATA(nid) = __va(NODE_DATA(nid)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); dbg("node %d\n", nid); @@ -988,14 +990,15 @@ void __init do_init_bootmem(void) dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_paddr = (unsigned long)careful_allocation(nid, + bootmem_vaddr = careful_allocation(nid, bootmap_pages << PAGE_SHIFT, PAGE_SIZE, end_pfn); - memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT); + memset(bootmem_vaddr, 0, bootmap_pages << PAGE_SHIFT); - dbg("bootmap_paddr = %lx\n", bootmem_paddr); + dbg("bootmap_vaddr = %p\n", bootmem_vaddr); - init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, + init_bootmem_node(NODE_DATA(nid), + __pa(bootmem_vaddr) >> PAGE_SHIFT, start_pfn, end_pfn); free_bootmem_with_active_regions(nid, end_pfn); -- cgit v1.2.3-70-g09d2 From 893473df78b4407c9ab75cb55479409795953b01 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 9 Dec 2008 08:21:36 +0000 Subject: powerpc/mm: Cleanup careful_allocation(): consolidate memset() Both users of careful_allocation() immediately memset() the result. So, just do it in one place. Also give careful_allocation() a 'z' prefix to bring it in line with kzmalloc() and friends. Signed-off-by: Dave Hansen Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm/numa.c') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9ec9939f9fb..7393bd76d69 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -824,7 +824,7 @@ static void __init dump_numa_memory_topology(void) * * Returns the virtual address of the memory. */ -static void __init *careful_allocation(int nid, unsigned long size, +static void __init *careful_zallocation(int nid, unsigned long size, unsigned long align, unsigned long end_pfn) { @@ -864,6 +864,7 @@ static void __init *careful_allocation(int nid, unsigned long size, dbg("alloc_bootmem %p %lx\n", ret, size); } + memset(ret, 0, size); return ret; } @@ -971,10 +972,9 @@ void __init do_init_bootmem(void) * previous nodes' bootmem to be initialized and have * all reserved areas marked. */ - NODE_DATA(nid) = careful_allocation(nid, + NODE_DATA(nid) = careful_zallocation(nid, sizeof(struct pglist_data), SMP_CACHE_BYTES, end_pfn); - memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); dbg("node %d\n", nid); dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); @@ -990,10 +990,9 @@ void __init do_init_bootmem(void) dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_vaddr = careful_allocation(nid, + bootmem_vaddr = careful_zallocation(nid, bootmap_pages << PAGE_SHIFT, PAGE_SIZE, end_pfn); - memset(bootmem_vaddr, 0, bootmap_pages << PAGE_SHIFT); dbg("bootmap_vaddr = %p\n", bootmem_vaddr); @@ -1004,7 +1003,7 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(nid, end_pfn); /* * Be very careful about moving this around. Future - * calls to careful_allocation() depend on this getting + * calls to careful_zallocation() depend on this getting * done correctly. */ mark_reserved_regions_for_nid(nid); -- cgit v1.2.3-70-g09d2