From 7b16bbf97375d9fb7fc107b3f80afeb94a204e44 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 18 Oct 2012 14:33:23 +0800 Subject: Revert "x86/mm: Fix the size calculation of mapping tables" Commit: 722bc6b16771 x86/mm: Fix the size calculation of mapping tables Tried to address the issue that the first 2/4M should use 4k pages if PSE enabled, but extra counts should only be valid for x86_32. This commit caused a kdump regression: the kdump kernel hangs. Work is in progress to fundamentally fix the various page table initialization issues that we have, via the design suggested by H. Peter Anvin, but it's not ready yet to be merged. So, to get a working kdump revert to the last known working version, which is the revert of this commit and of a followup fix (which was incomplete): bd2753b2dda7 x86/mm: Only add extra pages count for the first memory range during pre-allocation Tested kdump on physical and virtual machines. Signed-off-by: Dave Young Acked-by: Yinghai Lu Acked-by: Cong Wang Acked-by: Flavio Leitner Tested-by: Flavio Leitner Cc: Dan Carpenter Cc: Cong Wang Cc: Flavio Leitner Cc: Tejun Heo Cc: ianfang.cn@gmail.com Cc: Vivek Goyal Cc: Linus Torvalds Cc: Andrew Morton Cc: Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm/init.c') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ab1f6a93b52..8653b3a722b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -29,14 +29,8 @@ int direct_gbpages #endif ; -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - -static void __init find_early_table_space(struct map_range *mr, unsigned long end, - int use_pse, int use_gbpages) +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) { unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; phys_addr_t base; @@ -61,10 +55,6 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en #ifdef CONFIG_X86_32 extra += PMD_SIZE; #endif - /* The first 2/4M doesn't use large pages. */ - if (mr->start < PMD_SIZE) - extra += mr->end - mr->start; - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -95,6 +85,12 @@ void __init native_pagetable_reserve(u64 start, u64 end) memblock_reserve(start, end - start); } +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ @@ -267,7 +263,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(&mr[0], end, use_pse, use_gbpages); + find_early_table_space(end, use_pse, use_gbpages); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, -- cgit v1.2.3-70-g09d2 From 844ab6f993b1d32eb40512503d35ff6ad0c57030 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Wed, 24 Oct 2012 14:24:44 -0500 Subject: x86, mm: Find_early_table_space based on ranges that are actually being mapped Current logic finds enough space for direct mapping page tables from 0 to end. Instead, we only need to find enough space to cover mr[0].start to mr[nr_range].end -- the range that is actually being mapped by init_memory_mapping() This is needed after 1bbbbe779aabe1f0768c2bf8f8c0a5583679b54a, to address the panic reported here: https://lkml.org/lkml/2012/10/20/160 https://lkml.org/lkml/2012/10/21/157 Signed-off-by: Jacob Shin Link: http://lkml.kernel.org/r/20121024195311.GB11779@jshin-Toonie Tested-by: Tom Rini Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 70 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 29 deletions(-) (limited to 'arch/x86/mm/init.c') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 8653b3a722b..bc287d62bf1 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -29,36 +29,54 @@ int direct_gbpages #endif ; -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - if (use_gbpages) { - unsigned long extra; + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* The first 2/4M doesn't use large pages. */ + if (mr[i].start < PMD_SIZE) + extra += range; + + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -76,7 +94,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - end - 1, pgt_buf_start << PAGE_SHIFT, + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, (pgt_buf_top << PAGE_SHIFT) - 1); } @@ -85,12 +103,6 @@ void __init native_pagetable_reserve(u64 start, u64 end) memblock_reserve(start, end - start); } -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ @@ -263,7 +275,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, -- cgit v1.2.3-70-g09d2 From f82f64dd9f485e13f29f369772d4a0e868e5633a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 25 Oct 2012 15:45:26 -0700 Subject: x86, mm: Undo incorrect revert in arch/x86/mm/init.c Commit 844ab6f9 x86, mm: Find_early_table_space based on ranges that are actually being mapped added back some lines back wrongly that has been removed in commit 7b16bbf97 Revert "x86/mm: Fix the size calculation of mapping tables" remove them again. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/CAE9FiQW_vuaYQbmagVnxT2DGsYc=9tNeAbdBq53sYkitPOwxSQ@mail.gmail.com Acked-by: Jacob Shin Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/mm/init.c') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bc287d62bf1..d7aea41563b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -65,10 +65,6 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range) #ifdef CONFIG_X86_32 extra += PMD_SIZE; #endif - /* The first 2/4M doesn't use large pages. */ - if (mr[i].start < PMD_SIZE) - extra += range; - ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else { ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; -- cgit v1.2.3-70-g09d2