From 07b4e2bc9c35ea88cbd36d806fcd5e3bcbf022be Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Jul 2012 14:02:51 -0700 Subject: mm: sparse: fix section usemap placement calculation Commit 238305bb4d41 ("mm: remove sparsemem allocation details from the bootmem allocator") introduced a bug in the allocation goal calculation that put section usemaps not in the same section as the node descriptors, creating unnecessary hotplug dependencies between them: node 0 must be removed before remove section 16399 node 1 must be removed before remove section 16399 node 2 must be removed before remove section 16399 node 3 must be removed before remove section 16399 node 4 must be removed before remove section 16399 node 5 must be removed before remove section 16399 node 6 must be removed before remove section 16399 The reason is that it applies PAGE_SECTION_MASK to the physical address of the node descriptor when finding a suitable place to put the usemap, when this mask is actually intended to be used with PFNs. Because the PFN mask is wider, the target address will point beyond the wanted section holding the node descriptor and the node must be offlined before the section holding the usemap can go. Fix this by extending the mask to address width before use. Signed-off-by: Yinghai Lu Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 6a4bf9160e8..e861397016a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -287,7 +287,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ - goal = __pa(pgdat) & PAGE_SECTION_MASK; + goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); return __alloc_bootmem_node_nopanic(host_pgdat, size, SMP_CACHE_BYTES, goal); -- cgit v1.2.3-70-g09d2 From 99ab7b19440a72ebdf225f99b20f8ef40decee86 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Jul 2012 14:02:53 -0700 Subject: mm: sparse: fix usemap allocation above node descriptor section After commit f5bf18fa22f8 ("bootmem/sparsemem: remove limit constraint in alloc_bootmem_section"), usemap allocations may easily be placed outside the optimal section that holds the node descriptor, even if there is space available in that section. This results in unnecessary hotplug dependencies that need to have the node unplugged before the section holding the usemap. The reason is that the bootmem allocator doesn't guarantee a linear search starting from the passed allocation goal but may start out at a much higher address absent an upper limit. Fix this by trying the allocation with the limit at the section end, then retry without if that fails. This keeps the fix from f5bf18fa22f8 of not panicking if the allocation does not fit in the section, but still makes sure to try to stay within the section at first. Signed-off-by: Yinghai Lu Signed-off-by: Johannes Weiner Cc: [3.3.x, 3.4.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 5 +++++ mm/bootmem.c | 2 +- mm/nobootmem.c | 2 +- mm/sparse.c | 18 +++++++++++++----- 4 files changed, 20 insertions(+), 7 deletions(-) (limited to 'mm/sparse.c') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 324fe08ea3b..6d6795d46a7 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -91,6 +91,11 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); extern void *__alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal); diff --git a/mm/bootmem.c b/mm/bootmem.c index ec4fcb7a56c..73096630cb3 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, +void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { diff --git a/mm/nobootmem.c b/mm/nobootmem.c index d23415c001b..0900b3910cd 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -274,7 +274,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, +void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, diff --git a/mm/sparse.c b/mm/sparse.c index e861397016a..c7bb952400c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -275,8 +275,9 @@ static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { - pg_data_t *host_pgdat; - unsigned long goal; + unsigned long goal, limit; + unsigned long *p; + int nid; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while @@ -288,9 +289,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * this problem. */ goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); - host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); - return __alloc_bootmem_node_nopanic(host_pgdat, size, - SMP_CACHE_BYTES, goal); + limit = goal + (1UL << PA_SECTION_SHIFT); + nid = early_pfn_to_nid(goal >> PAGE_SHIFT); +again: + p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, + SMP_CACHE_BYTES, goal, limit); + if (!p && limit) { + limit = 0; + goto again; + } + return p; } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) -- cgit v1.2.3-70-g09d2