From ca57df79d4f64e1a4886606af4289d40636189c5 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Tue, 31 Jul 2012 16:43:19 -0700 Subject: mm: setup pageblock_order before it's used by sparsemem On architectures with CONFIG_HUGETLB_PAGE_SIZE_VARIABLE set, such as Itanium, pageblock_order is a variable with default value of 0. It's set to the right value by set_pageblock_order() in function free_area_init_core(). But pageblock_order may be used by sparse_init() before free_area_init_core() is called along path: sparse_init() ->sparse_early_usemaps_alloc_node() ->usemap_size() ->SECTION_BLOCKFLAGS_BITS ->((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) The uninitialized pageblock_size will cause memory wasting because usemap_size() returns a much bigger value then it's really needed. For example, on an Itanium platform, sparse_init() pageblock_order=0 usemap_size=24576 free_area_init_core() before pageblock_order=0, usemap_size=24576 free_area_init_core() after pageblock_order=12, usemap_size=8 That means 24K memory has been wasted for each section, so fix it by calling set_pageblock_order() from sparse_init(). Signed-off-by: Xishi Qiu Signed-off-by: Jiang Liu Cc: Tony Luck Cc: Yinghai Lu Cc: KAMEZAWA Hiroyuki Cc: Benjamin Herrenschmidt Cc: KOSAKI Motohiro Cc: David Rientjes Cc: Keping Chen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index c7bb952400c..950981fd07c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -493,6 +493,9 @@ void __init sparse_init(void) struct page **map_map; #endif + /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ + set_pageblock_order(); + /* * map is using big page (aka 2M in x86 64 bit) * usemap is less one page (aka 24 bytes) -- cgit v1.2.3-70-g09d2 From 5b760e64a64c8940cdccd0ba6fce19a9bd010d20 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 31 Jul 2012 16:46:02 -0700 Subject: mm/sparse: optimize sparse_index_alloc With CONFIG_SPARSEMEM_EXTREME, the two levels of memory section descriptors are allocated from slab or bootmem. When allocating from slab, let slab/bootmem allocator clear the memory chunk. We needn't clear it explicitly. Signed-off-by: Gavin Shan Reviewed-by: Michal Hocko Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 950981fd07c..fa933f43b2c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -65,14 +65,12 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) if (slab_is_available()) { if (node_state(nid, N_HIGH_MEMORY)) - section = kmalloc_node(array_size, GFP_KERNEL, nid); + section = kzalloc_node(array_size, GFP_KERNEL, nid); else - section = kmalloc(array_size, GFP_KERNEL); - } else + section = kzalloc(array_size, GFP_KERNEL); + } else { section = alloc_bootmem_node(NODE_DATA(nid), array_size); - - if (section) - memset(section, 0, array_size); + } return section; } -- cgit v1.2.3-70-g09d2 From db36a46113e101a8aa2d6ede41e78f2eaabed3f1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 31 Jul 2012 16:46:04 -0700 Subject: mm/sparse: more checks on mem_section number __section_nr() was implemented to retrieve the corresponding memory section number according to its descriptor. It's possible that the specified memory section descriptor doesn't exist in the global array. So add more checking on that and report an error for a wrong case. Signed-off-by: Gavin Shan Acked-by: David Rientjes Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index fa933f43b2c..42ca0ea9af1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -130,6 +130,8 @@ int __section_nr(struct mem_section* ms) break; } + VM_BUG_ON(root_nr == NR_SECTION_ROOTS); + return (root_nr * SECTIONS_PER_ROOT) + (ms - root); } -- cgit v1.2.3-70-g09d2 From c1c9518331969f97ea403bac66f0fd4a85d204d5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 31 Jul 2012 16:46:06 -0700 Subject: mm/sparse: remove index_init_lock sparse_index_init() uses the index_init_lock spinlock to protect root mem_section assignment. The lock is not necessary anymore because the function is called only during boot (during paging init which is executed only from a single CPU) and from the hotplug code (by add_memory() via arch_add_memory()) which uses mem_hotplug_mutex. The lock was introduced by 28ae55c9 ("sparsemem extreme: hotplug preparation") and sparse_index_init() was used only during boot at that time. Later when the hotplug code (and add_memory()) was introduced there was no synchronization so it was possible to online more sections from the same root probably (though I am not 100% sure about that). The first synchronization has been added by 6ad696d2 ("mm: allow memory hotplug and hibernation in the same kernel") which was later replaced by the mem_hotplug_mutex - 20d6c96b ("mem-hotplug: introduce {un}lock_memory_hotplug()"). Let's remove the lock as it is not needed and it makes the code more confusing. [mhocko@suse.cz: changelog] Signed-off-by: Gavin Shan Reviewed-by: Michal Hocko Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 42ca0ea9af1..fac95f2888f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -77,7 +77,6 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) static int __meminit sparse_index_init(unsigned long section_nr, int nid) { - static DEFINE_SPINLOCK(index_init_lock); unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; int ret = 0; @@ -88,20 +87,9 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) section = sparse_index_alloc(nid); if (!section) return -ENOMEM; - /* - * This lock keeps two different sections from - * reallocating for the same index - */ - spin_lock(&index_init_lock); - - if (mem_section[root]) { - ret = -EEXIST; - goto out; - } mem_section[root] = section; -out: - spin_unlock(&index_init_lock); + return ret; } #else /* !SPARSEMEM_EXTREME */ -- cgit v1.2.3-70-g09d2