From fb74fb6db91abc3c1ceeb9d2c17b44866a12c63e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:15 -0700 Subject: x86, memblock: Add memblock_x86_find_in_range_size() size is returned according free range. Will be used to find free ranges for early_memtest and memory corruption check Do not mess it up with lib/memblock.c yet. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 8 ++++ arch/x86/mm/Makefile | 2 + arch/x86/mm/memblock.c | 87 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 arch/x86/include/asm/memblock.h create mode 100644 arch/x86/mm/memblock.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h new file mode 100644 index 00000000000..c14219a2616 --- /dev/null +++ b/arch/x86/include/asm/memblock.h @@ -0,0 +1,8 @@ +#ifndef _X86_MEMBLOCK_H +#define _X86_MEMBLOCK_H + +#define ARCH_DISCARD_MEMBLOCK + +u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); + +#endif diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index a4c768397ba..55543397a8a 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -26,4 +26,6 @@ obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o obj-$(CONFIG_K8_NUMA) += k8topology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o + obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c new file mode 100644 index 00000000000..26ba46234cb --- /dev/null +++ b/arch/x86/mm/memblock.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/* Check for already reserved areas */ +static inline bool __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +{ + struct memblock_region *r; + u64 addr = *addrp, last; + u64 size = *sizep; + bool changed = false; + +again: + last = addr + size; + for_each_memblock(reserved, r) { + if (last > r->base && addr < r->base) { + size = r->base - addr; + changed = true; + goto again; + } + if (last > (r->base + r->size) && addr < (r->base + r->size)) { + addr = round_up(r->base + r->size, align); + size = last - addr; + changed = true; + goto again; + } + if (last <= (r->base + r->size) && addr >= r->base) { + (*sizep)++; + return false; + } + } + if (changed) { + *addrp = addr; + *sizep = size; + } + return changed; +} + +static u64 __init __memblock_x86_find_in_range_size(u64 ei_start, u64 ei_last, u64 start, + u64 *sizep, u64 align) +{ + u64 addr, last; + + addr = round_up(ei_start, align); + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + goto out; + *sizep = ei_last - addr; + while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) + ; + last = addr + *sizep; + if (last > ei_last) + goto out; + + return addr; + +out: + return MEMBLOCK_ERROR; +} + +/* + * Find next free range after start, and size is returned in *sizep + */ +u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) +{ + struct memblock_region *r; + + for_each_memblock(memory, r) { + u64 ei_start = r->base; + u64 ei_last = ei_start + r->size; + u64 addr; + + addr = __memblock_x86_find_in_range_size(ei_start, ei_last, start, + sizep, align); + + if (addr != MEMBLOCK_ERROR) + return addr; + } + + return MEMBLOCK_ERROR; +} -- cgit v1.2.3-70-g09d2 From f88eff74aa848e58b1ea49768c0bbb874b31357f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:15 -0700 Subject: bootmem, x86: Add weak version of reserve_bootmem_generic It will be used memblock_x86_to_bootmem converting It is an wrapper for reserve_bootmem, and x86 64bit is using special one. Also clean up that version for x86_64. We don't need to take care of numa path for that, bootmem can handle it how Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_32.c | 6 ------ arch/x86/mm/init_64.c | 20 ++------------------ mm/bootmem.c | 6 ++++++ 3 files changed, 8 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bca79091b9d..90e054589aa 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1069,9 +1069,3 @@ void mark_rodata_ro(void) #endif } #endif - -int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, - int flags) -{ - return reserve_bootmem(phys, len, flags); -} diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee41bba315d..634fa0884a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -799,13 +799,10 @@ void mark_rodata_ro(void) #endif +#ifndef CONFIG_NO_BOOTMEM int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, int flags) { -#ifdef CONFIG_NUMA - int nid, next_nid; - int ret; -#endif unsigned long pfn = phys >> PAGE_SHIFT; if (pfn >= max_pfn) { @@ -821,21 +818,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, return -EFAULT; } - /* Should check here against the e820 map to avoid double free */ -#ifdef CONFIG_NUMA - nid = phys_to_nid(phys); - next_nid = phys_to_nid(phys + len - 1); - if (nid == next_nid) - ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags); - else - ret = reserve_bootmem(phys, len, flags); - - if (ret != 0) - return ret; - -#else reserve_bootmem(phys, len, flags); -#endif if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; @@ -844,6 +827,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, return 0; } +#endif int kern_addr_valid(unsigned long addr) { diff --git a/mm/bootmem.c b/mm/bootmem.c index 142c84a5499..bde170dd2fd 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -526,6 +526,12 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, } #ifndef CONFIG_NO_BOOTMEM +int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len, + int flags) +{ + return reserve_bootmem(phys, len, flags); +} + static unsigned long __init align_idx(struct bootmem_data *bdata, unsigned long idx, unsigned long step) { -- cgit v1.2.3-70-g09d2 From 27de794365786b4cdc3461ed4e23af2a33f40612 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:15 -0700 Subject: x86, memblock: Add memblock_x86_to_bootmem() memblock_x86_to_bootmem() will reserve memblock.reserved.region in bootmem after bootmem is set up. We can use it to with all arches that support memblock later. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 1 + arch/x86/mm/memblock.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index c14219a2616..69cf853e931 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -4,5 +4,6 @@ #define ARCH_DISCARD_MEMBLOCK u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); +void memblock_x86_to_bootmem(u64 start, u64 end); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 26ba46234cb..8101084d452 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -85,3 +85,32 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) return MEMBLOCK_ERROR; } + +#ifndef CONFIG_NO_BOOTMEM +void __init memblock_x86_to_bootmem(u64 start, u64 end) +{ + int count; + u64 final_start, final_end; + struct memblock_region *r; + + /* Take out region array itself */ + memblock_free_reserved_regions(); + + count = memblock.reserved.cnt; + pr_info("(%d early reservations) ==> bootmem [%010llx-%010llx]\n", count, start, end - 1); + for_each_memblock(reserved, r) { + pr_info(" [%010llx-%010llx] ", (u64)r->base, (u64)r->base + r->size - 1); + final_start = max(start, r->base); + final_end = min(end, r->base + r->size); + if (final_start >= final_end) { + pr_cont("\n"); + continue; + } + pr_cont(" ==> [%010llx-%010llx]\n", final_start, final_end - 1); + reserve_bootmem_generic(final_start, final_end - final_start, BOOTMEM_DEFAULT); + } + + /* Put region array back ? */ + memblock_reserve_reserved_regions(); +} +#endif -- cgit v1.2.3-70-g09d2 From 9dc5d569c133819c1ce069ebb1d771c62de32580 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:15 -0700 Subject: x86, memblock: Add memblock_x86_reserve_range/memblock_x86_free_range They are wrappers for core versions, which take start/end/name instead of base/size. This will make x86 conversion eaasier. could add more debug print out -v2: change get_max_mapped() to memblock.default_alloc_limit according to Michael Ellerman and Ben change to memblock_x86_reserve_range and memblock_x86_free_range according to Michael Ellerman -v3: call check_and_double after reserve/free, so could avoid to use find_memblock_area. Suggested by Michael Ellerman Signed-off-by: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 3 +++ arch/x86/mm/memblock.c | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 69cf853e931..e11ddf059fa 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -6,4 +6,7 @@ u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); void memblock_x86_to_bootmem(u64 start, u64 end); +void memblock_x86_reserve_range(u64 start, u64 end, char *name); +void memblock_x86_free_range(u64 start, u64 end); + #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 8101084d452..9829eaf1dbd 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -114,3 +114,25 @@ void __init memblock_x86_to_bootmem(u64 start, u64 end) memblock_reserve_reserved_regions(); } #endif + +void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) +{ + if (start == end) + return; + + if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx]\n", start, end)) + return; + + memblock_reserve(start, end - start); +} + +void __init memblock_x86_free_range(u64 start, u64 end) +{ + if (start == end) + return; + + if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx]\n", start, end)) + return; + + memblock_free(start, end - start); +} -- cgit v1.2.3-70-g09d2 From 4d5cf86ce187c0d3a4cdf233ab0cc6526ccbe01f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: x86, memblock: Add get_free_all_memory_range() get_free_all_memory_range is for CONFIG_NO_BOOTMEM=y, and will be called by free_all_memory_core_early(). It will use early_node_map aka active ranges subtract memblock.reserved to get all free range, and those ranges will convert to slab pages. -v4: increase range size Signed-off-by: Yinghai Lu Cc: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 2 + arch/x86/mm/memblock.c | 98 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index e11ddf059fa..72639ce65e8 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -8,5 +8,7 @@ void memblock_x86_to_bootmem(u64 start, u64 end); void memblock_x86_reserve_range(u64 start, u64 end, char *name); void memblock_x86_free_range(u64 start, u64 end); +struct range; +int get_free_all_memory_range(struct range **rangep, int nodeid); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 9829eaf1dbd..b4500604ab3 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -86,7 +86,103 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) return MEMBLOCK_ERROR; } -#ifndef CONFIG_NO_BOOTMEM +static __init struct range *find_range_array(int count) +{ + u64 end, size, mem; + struct range *range; + + size = sizeof(struct range) * count; + end = memblock.current_limit; + + mem = memblock_find_in_range(0, end, size, sizeof(struct range)); + if (mem == MEMBLOCK_ERROR) + panic("can not find more space for range array"); + + /* + * This range is tempoaray, so don't reserve it, it will not be + * overlapped because We will not alloccate new buffer before + * We discard this one + */ + range = __va(mem); + memset(range, 0, size); + + return range; +} + +#ifdef CONFIG_NO_BOOTMEM +static void __init memblock_x86_subtract_reserved(struct range *range, int az) +{ + u64 final_start, final_end; + struct memblock_region *r; + + /* Take out region array itself at first*/ + memblock_free_reserved_regions(); + + pr_info("Subtract (%ld early reservations)\n", memblock.reserved.cnt); + + for_each_memblock(reserved, r) { + pr_info(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1); + final_start = PFN_DOWN(r->base); + final_end = PFN_UP(r->base + r->size); + if (final_start >= final_end) + continue; + subtract_range(range, az, final_start, final_end); + } + + /* Put region array back ? */ + memblock_reserve_reserved_regions(); +} + +struct count_data { + int nr; +}; + +static int __init count_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + struct count_data *data = datax; + + data->nr++; + + return 0; +} + +static int __init count_early_node_map(int nodeid) +{ + struct count_data data; + + data.nr = 0; + work_with_active_regions(nodeid, count_work_fn, &data); + + return data.nr; +} + +int __init get_free_all_memory_range(struct range **rangep, int nodeid) +{ + int count; + struct range *range; + int nr_range; + + count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2; + + range = find_range_array(count); + nr_range = 0; + + /* + * Use early_node_map[] and memblock.reserved.region to get range array + * at first + */ + nr_range = add_from_early_node_map(range, count, nr_range, nodeid); +#ifdef CONFIG_X86_32 + subtract_range(range, count, max_low_pfn, -1ULL); +#endif + memblock_x86_subtract_reserved(range, count); + nr_range = clean_sort_range(range, count); + + *rangep = range; + return nr_range; +} +#else void __init memblock_x86_to_bootmem(u64 start, u64 end) { int count; -- cgit v1.2.3-70-g09d2 From 88ba088c18457caaf8d2e5f8d36becc731a3d4f6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: x86, memblock: Add memblock_x86_register_active_regions() and memblock_x86_hole_size() memblock_x86_register_active_regions() will be used to fill early_node_map, the result will be memblock.memory.region AND numa data memblock_x86_hole_size will be used to find hole size on memblock.memory.region with specified range. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 4 +++ arch/x86/mm/memblock.c | 66 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 72639ce65e8..16af28d3607 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -11,4 +11,8 @@ void memblock_x86_free_range(u64 start, u64 end); struct range; int get_free_all_memory_range(struct range **rangep, int nodeid); +void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn); +u64 memblock_x86_hole_size(u64 start, u64 end); + #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index b4500604ab3..53a7a5aebd6 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -232,3 +232,69 @@ void __init memblock_x86_free_range(u64 start, u64 end) memblock_free(start, end - start); } + +/* + * Finds an active region in the address range from start_pfn to last_pfn and + * returns its range in ei_startpfn and ei_endpfn for the memblock entry. + */ +static int __init memblock_x86_find_active_region(const struct memblock_region *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + u64 align = PAGE_SIZE; + + *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + return 1; +} + +/* Walk the memblock.memory map and register active regions within a node */ +void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn; + unsigned long ei_endpfn; + struct memblock_region *r; + + for_each_memblock(memory, r) + if (memblock_x86_find_active_region(r, start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); +} + +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +u64 __init memblock_x86_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn, ei_endpfn, ram = 0; + struct memblock_region *r; + + for_each_memblock(memory, r) + if (memblock_x86_find_active_region(r, start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + + return end - start - ((u64)ram << PAGE_SHIFT); +} -- cgit v1.2.3-70-g09d2 From 6bcc8176d07f108da3b1af17fb2c0e82c80e948e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: x86, memblock: Add memblock_x86_find_in_range_node() It can be used to find NODE_DATA for numa. Need to make sure early_node_map[] is filled before it is called, otherwise it will fallback to memblock_find_in_range(), with node range. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 1 + arch/x86/mm/memblock.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 16af28d3607..3a86b10380f 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -14,5 +14,6 @@ int get_free_all_memory_range(struct range **rangep, int nodeid); void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, unsigned long last_pfn); u64 memblock_x86_hole_size(u64 start, u64 end); +u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 53a7a5aebd6..22ff0a39b22 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -233,6 +233,21 @@ void __init memblock_x86_free_range(u64 start, u64 end) memblock_free(start, end - start); } +/* + * Need to call this function after memblock_x86_register_active_regions, + * so early_node_map[] is filled already. + */ +u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align) +{ + u64 addr; + addr = find_memory_core_early(nid, size, align, start, end); + if (addr != MEMBLOCK_ERROR) + return addr; + + /* Fallback, should already have start end within node range */ + return memblock_find_in_range(start, end, size, align); +} + /* * Finds an active region in the address range from start_pfn to last_pfn and * returns its range in ei_startpfn and ei_endpfn for the memblock entry. -- cgit v1.2.3-70-g09d2 From b52c17ce854125700c4e19d4427d39bf2504ff63 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:16 -0700 Subject: x86, memblock: Add memblock_x86_free_memory_in_range() It will return free memory size in specified range. We can not use memory_size - reserved_size here, because some reserved area may not be in the scope of memblock.memory.region. Use memblock.memory.region subtracting memblock.reserved.region to get free range array. then count size of all free ranges. -v2: Ben insist on using _in_range Signed-off-by: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 1 + arch/x86/mm/memblock.c | 48 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 3a86b10380f..fc3c230812e 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -15,5 +15,6 @@ void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, unsigned long last_pfn); u64 memblock_x86_hole_size(u64 start, u64 end); u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); +u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 22ff0a39b22..30d60cf29ce 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -211,6 +211,54 @@ void __init memblock_x86_to_bootmem(u64 start, u64 end) } #endif +u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) +{ + int i, count; + struct range *range; + int nr_range; + u64 final_start, final_end; + u64 free_size; + struct memblock_region *r; + + count = (memblock.reserved.cnt + memblock.memory.cnt) * 2; + + range = find_range_array(count); + nr_range = 0; + + addr = PFN_UP(addr); + limit = PFN_DOWN(limit); + + for_each_memblock(memory, r) { + final_start = PFN_UP(r->base); + final_end = PFN_DOWN(r->base + r->size); + if (final_start >= final_end) + continue; + if (final_start >= limit || final_end <= addr) + continue; + + nr_range = add_range(range, count, nr_range, final_start, final_end); + } + subtract_range(range, count, 0, addr); + subtract_range(range, count, limit, -1ULL); + for_each_memblock(reserved, r) { + final_start = PFN_DOWN(r->base); + final_end = PFN_UP(r->base + r->size); + if (final_start >= final_end) + continue; + if (final_start >= limit || final_end <= addr) + continue; + + subtract_range(range, count, final_start, final_end); + } + nr_range = clean_sort_range(range, count); + + free_size = 0; + for (i = 0; i < nr_range; i++) + free_size += range[i].end - range[i].start; + + return free_size << PAGE_SHIFT; +} + void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) { if (start == end) -- cgit v1.2.3-70-g09d2 From e82d42be24bd5d75bf6f81045636e6ca95ab55f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:17 -0700 Subject: x86, memblock: Add memblock_x86_memory_in_range() It will return memory size in specified range according to memblock.memory.region Try to share some code with memblock_x86_free_memory_in_range() by passing get_free to __memblock_x86_memory_in_range(). -v2: Ben want _in_range in the name instead of size Signed-off-by: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 1 + arch/x86/mm/memblock.c | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index fc3c230812e..2c304bb6e07 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -16,5 +16,6 @@ void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, u64 memblock_x86_hole_size(u64 start, u64 end); u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); +u64 memblock_x86_memory_in_range(u64 addr, u64 limit); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 30d60cf29ce..32ddad5dc93 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -211,7 +211,7 @@ void __init memblock_x86_to_bootmem(u64 start, u64 end) } #endif -u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) +static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free) { int i, count; struct range *range; @@ -240,6 +240,10 @@ u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) } subtract_range(range, count, 0, addr); subtract_range(range, count, limit, -1ULL); + + /* Subtract memblock.reserved.region in range ? */ + if (!get_free) + goto sort_and_count_them; for_each_memblock(reserved, r) { final_start = PFN_DOWN(r->base); final_end = PFN_UP(r->base + r->size); @@ -250,6 +254,8 @@ u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) subtract_range(range, count, final_start, final_end); } + +sort_and_count_them: nr_range = clean_sort_range(range, count); free_size = 0; @@ -259,6 +265,16 @@ u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) return free_size << PAGE_SHIFT; } +u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) +{ + return __memblock_x86_memory_in_range(addr, limit, true); +} + +u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit) +{ + return __memblock_x86_memory_in_range(addr, limit, false); +} + void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) { if (start == end) -- cgit v1.2.3-70-g09d2 From 301ff3e88ef9ff4bdb92f36a3e6170fce4c9dd34 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:17 -0700 Subject: x86, memblock: Use memblock_debug to control debug message print out Also let memblock_x86_reserve_range/memblock_x86_free_range could print out name if memblock=debug is specified will also print ther name when reserve_memblock_area/free_memblock_area are called. -v2: according to Ingo, put " if (memblock_debug) " in one place Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/mm/memblock.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 32ddad5dc93..aaff3932588 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -118,10 +118,10 @@ static void __init memblock_x86_subtract_reserved(struct range *range, int az) /* Take out region array itself at first*/ memblock_free_reserved_regions(); - pr_info("Subtract (%ld early reservations)\n", memblock.reserved.cnt); + memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt); for_each_memblock(reserved, r) { - pr_info(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1); + memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1); final_start = PFN_DOWN(r->base); final_end = PFN_UP(r->base + r->size); if (final_start >= final_end) @@ -193,16 +193,16 @@ void __init memblock_x86_to_bootmem(u64 start, u64 end) memblock_free_reserved_regions(); count = memblock.reserved.cnt; - pr_info("(%d early reservations) ==> bootmem [%010llx-%010llx]\n", count, start, end - 1); + memblock_dbg("(%d early reservations) ==> bootmem [%#010llx-%#010llx]\n", count, start, end - 1); for_each_memblock(reserved, r) { - pr_info(" [%010llx-%010llx] ", (u64)r->base, (u64)r->base + r->size - 1); + memblock_dbg(" [%#010llx-%#010llx] ", (u64)r->base, (u64)r->base + r->size - 1); final_start = max(start, r->base); final_end = min(end, r->base + r->size); if (final_start >= final_end) { - pr_cont("\n"); + memblock_dbg("\n"); continue; } - pr_cont(" ==> [%010llx-%010llx]\n", final_start, final_end - 1); + memblock_dbg(" ==> [%#010llx-%#010llx]\n", final_start, final_end - 1); reserve_bootmem_generic(final_start, final_end - final_start, BOOTMEM_DEFAULT); } @@ -280,9 +280,11 @@ void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) if (start == end) return; - if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx]\n", start, end)) + if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end)) return; + memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name); + memblock_reserve(start, end - start); } @@ -291,9 +293,11 @@ void __init memblock_x86_free_range(u64 start, u64 end) if (start == end) return; - if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx]\n", start, end)) + if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end)) return; + memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1); + memblock_free(start, end - start); } -- cgit v1.2.3-70-g09d2 From 72d7c3b33c980843e756681fb4867dc1efd62a76 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:17 -0700 Subject: x86: Use memblock to replace early_res 1. replace find_e820_area with memblock_find_in_range 2. replace reserve_early with memblock_x86_reserve_range 3. replace free_early with memblock_x86_free_range. 4. NO_BOOTMEM will switch to use memblock too. 5. use _e820, _early wrap in the patch, in following patch, will replace them all 6. because memblock_x86_free_range support partial free, we can remove some special care 7. Need to make sure that memblock_find_in_range() is called after memblock_x86_fill() so adjust some calling later in setup.c::setup_arch() -- corruption_check and mptable_update -v2: Move reserve_brk() early Before fill_memblock_area, to avoid overlap between brk and memblock_find_in_range() that could happen We have more then 128 RAM entry in E820 tables, and memblock_x86_fill() could use memblock_find_in_range() to find a new place for memblock.memory.region array. and We don't need to use extend_brk() after fill_memblock_area() So move reserve_brk() early before fill_memblock_area(). -v3: Move find_smp_config early To make sure memblock_find_in_range not find wrong place, if BIOS doesn't put mptable in right place. -v4: Treat RESERVED_KERN as RAM in memblock.memory. and they are already in memblock.reserved already.. use __NOT_KEEP_MEMBLOCK to make sure memblock related code could be freed later. -v5: Generic version __memblock_find_in_range() is going from high to low, and for 32bit active_region for 32bit does include high pages need to replace the limit with memblock.default_alloc_limit, aka get_max_mapped() -v6: Use current_limit instead -v7: check with MEMBLOCK_ERROR instead of -1ULL or -1L -v8: Set memblock_can_resize early to handle EFI with more RAM entries -v9: update after kmemleak changes in mainline Suggested-by: David S. Miller Suggested-by: Benjamin Herrenschmidt Suggested-by: Thomas Gleixner Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 9 +-- arch/x86/include/asm/e820.h | 14 ++-- arch/x86/kernel/check.c | 16 +++-- arch/x86/kernel/e820.c | 159 ++++++++++++++--------------------------- arch/x86/kernel/head.c | 3 +- arch/x86/kernel/head32.c | 6 +- arch/x86/kernel/head64.c | 3 + arch/x86/kernel/mpparse.c | 5 +- arch/x86/kernel/setup.c | 46 ++++++++---- arch/x86/kernel/setup_percpu.c | 6 -- arch/x86/mm/numa_64.c | 9 +-- mm/bootmem.c | 3 + mm/page_alloc.c | 50 ++++--------- mm/sparse-vmemmap.c | 11 --- 14 files changed, 141 insertions(+), 199 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcb0593b4a6..542bb2610cb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES + select HAVE_MEMBLOCK select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS @@ -195,9 +196,6 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config HAVE_EARLY_RES - def_bool y - config HAVE_INTEL_TXT def_bool y depends on EXPERIMENTAL && DMAR && ACPI @@ -590,14 +588,13 @@ config NO_BOOTMEM default y bool "Disable Bootmem code" ---help--- - Use early_res directly instead of bootmem before slab is ready. + Use memblock directly instead of bootmem before slab is ready. - allocator (buddy) [generic] - early allocator (bootmem) [generic] - - very early allocator (reserve_early*()) [x86] + - very early allocator (memblock) [some generic] - very very early allocator (early brk model) [x86] So reduce one layer between early allocator to final allocator - config MEMTEST bool "Memtest" ---help--- diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index ec8a52d14ab..388fed29146 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -117,24 +117,26 @@ extern unsigned long end_user_pfn; extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); -#include extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern int e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn); extern void e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); extern u64 e820_hole_size(u64 start, u64 end); + +extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); + +void memblock_x86_fill(void); + extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); +void reserve_early(u64 start, u64 end, char *name); +void free_early(u64 start, u64 end); + /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index fc999e6fc46..13a38917951 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -2,7 +2,8 @@ #include #include #include -#include +#include + #include /* @@ -18,10 +19,12 @@ static int __read_mostly memory_corruption_check = -1; static unsigned __read_mostly corruption_check_size = 64*1024; static unsigned __read_mostly corruption_check_period = 60; /* seconds */ -static struct e820entry scan_areas[MAX_SCAN_AREAS]; +static struct scan_area { + u64 addr; + u64 size; +} scan_areas[MAX_SCAN_AREAS]; static int num_scan_areas; - static __init int set_corruption_check(char *arg) { char *end; @@ -81,9 +84,9 @@ void __init setup_bios_corruption_check(void) while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { u64 size; - addr = find_e820_area_size(addr, &size, PAGE_SIZE); + addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); - if (!(addr + 1)) + if (addr == MEMBLOCK_ERROR) break; if (addr >= corruption_check_size) @@ -92,7 +95,7 @@ void __init setup_bios_corruption_check(void) if ((addr + size) > corruption_check_size) size = corruption_check_size - addr; - e820_update_range(addr, size, E820_RAM, E820_RESERVED); + memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); scan_areas[num_scan_areas].addr = addr; scan_areas[num_scan_areas].size = size; num_scan_areas++; @@ -105,7 +108,6 @@ void __init setup_bios_corruption_check(void) printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas); - update_e820(); } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0d6fc71bedb..a9221d18a5e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -742,69 +743,29 @@ core_initcall(e820_mark_nvs_memory); */ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) { - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; + u64 mem = memblock_find_in_range(start, end, size, align); - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area(ei_start, ei_last, start, end, - size, align); - - if (addr != -1ULL) - return addr; - } - return -1ULL; -} + if (mem == MEMBLOCK_ERROR) + return -1ULL; -u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) -{ - return find_e820_area(start, end, size, align); + return mem; } -u64 __init get_max_mapped(void) -{ - u64 end = max_pfn_mapped; - - end <<= PAGE_SHIFT; - - return end; -} /* * Find next free range after *start */ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) { - int i; + u64 mem = memblock_x86_find_in_range_size(start, sizep, align); - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area_size(ei_start, ei_last, start, - sizep, align); + if (mem == MEMBLOCK_ERROR) + return -1ULL - if (addr != -1ULL) - return addr; - } - - return -1ULL; + return mem; } /* - * pre allocated 4k and reserved it in e820 + * pre allocated 4k and reserved it in memblock and e820_saved */ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) { @@ -813,8 +774,8 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) u64 start; for (start = startt; ; start += size) { - start = find_e820_area_size(start, &size, align); - if (!(start + 1)) + start = memblock_x86_find_in_range_size(start, &size, align); + if (start == MEMBLOCK_ERROR) return 0; if (size >= sizet) break; @@ -830,10 +791,9 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) addr = round_down(start + size - sizet, align); if (addr < start) return 0; - e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); + memblock_x86_reserve_range(addr, addr + sizet, "new next"); e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); - printk(KERN_INFO "update e820 for early_reserve_e820\n"); - update_e820(); + printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); update_e820_saved(); return addr; @@ -895,52 +855,12 @@ unsigned long __init e820_end_of_low_ram_pfn(void) { return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); } -/* - * Finds an active region in the address range from start_pfn to last_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - u64 align = PAGE_SIZE; - - *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= last_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > last_pfn) - *ei_endpfn = last_pfn; - - return 1; -} /* Walk the e820 map and register active regions within a node */ void __init e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long last_pfn) { - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); + memblock_x86_register_active_regions(nid, start_pfn, last_pfn); } /* @@ -950,18 +870,16 @@ void __init e820_register_active_regions(int nid, unsigned long start_pfn, */ u64 __init e820_hole_size(u64 start, u64 end) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long last_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; + return memblock_x86_hole_size(start, end); +} - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - ((u64)ram << PAGE_SHIFT); +void reserve_early(u64 start, u64 end, char *name) +{ + memblock_x86_reserve_range(start, end, name); +} +void free_early(u64 start, u64 end) +{ + memblock_x86_free_range(start, end); } static void early_panic(char *msg) @@ -1210,3 +1128,32 @@ void __init setup_memory_map(void) printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map(who); } + +void __init memblock_x86_fill(void) +{ + int i; + u64 end; + + /* + * EFI may have more than 128 entries + * We are safe to enable resizing, beause memblock_x86_fill() + * is rather later for x86 + */ + memblock_can_resize = 1; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + end = ei->addr + ei->size; + if (end != (resource_size_t)end) + continue; + + if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) + continue; + + memblock_add(ei->addr, ei->size); + } + + memblock_analyze(); + memblock_dump_all(); +} diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd364a9..af0699ba48c 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -51,5 +52,5 @@ void __init reserve_ebda_region(void) lowmem = 0x9f000; /* reserve all memory between lowmem and the 1MB mark */ - reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); + memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index b2e24603739..da60aa8a850 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -30,14 +31,15 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { + memblock_init(); + #ifdef CONFIG_X86_TRAMPOLINE /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, - "EX TRAMPOLINE"); + memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); #endif reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7147143fd61..8ee930fdeeb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -98,6 +99,8 @@ void __init x86_64_start_reservations(char *real_mode_data) { copy_bootdata(__va(real_mode_data)); + memblock_init(); + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d86dbf7e54b..8252545ae6f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -641,7 +642,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); - reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); + memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); } static int __init smp_scan_config(unsigned long base, unsigned long length) @@ -670,7 +671,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf, (u64)virt_to_phys(mpf)); mem = virt_to_phys(mpf); - reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); + memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); if (mpf->physptr) smp_reserve_memory(mpf); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4ae4acbd03..bbe0aaf7749 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -614,7 +615,7 @@ static __init void reserve_ibft_region(void) addr = find_ibft_region(&size); if (size) - reserve_early_overlap_ok(addr, addr + size, "ibft"); + memblock_x86_reserve_range(addr, addr + size, "* ibft"); } #ifdef CONFIG_X86_RESERVE_LOW_64K @@ -708,6 +709,15 @@ static void __init trim_bios_range(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } +static u64 __init get_max_mapped(void) +{ + u64 end = max_pfn_mapped; + + end <<= PAGE_SHIFT; + + return end; +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -891,8 +901,6 @@ void __init setup_arch(char **cmdline_p) */ max_pfn = e820_end_of_ram_pfn(); - /* preallocate 4k for mptable mpc */ - early_reserve_e820_mpc_new(); /* update e820 for memory not covered by WB MTRRs */ mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) @@ -917,15 +925,6 @@ void __init setup_arch(char **cmdline_p) max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif -#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION - setup_bios_corruption_check(); -#endif - - printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", - max_pfn_mapped< #include #include +#include #include #include #include @@ -171,8 +172,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start, if (start < (MAX_DMA32_PFN< (MAX_DMA32_PFN< #include #include +#include #include #include @@ -434,6 +435,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size) { #ifdef CONFIG_NO_BOOTMEM + kmemleak_free_part(__va(physaddr), size); free_early(physaddr, physaddr + size); #else unsigned long start, end; @@ -459,6 +461,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, void __init free_bootmem(unsigned long addr, unsigned long size) { #ifdef CONFIG_NO_BOOTMEM + kmemleak_free_part(__va(addr), size); free_early(addr, addr + size); #else unsigned long start, end; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8c9b34674d8..f2cd7450fa7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3667,46 +3667,26 @@ int __init add_from_early_node_map(struct range *range, int az, void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, u64 goal, u64 limit) { - int i; void *ptr; + u64 addr; - if (limit > get_max_mapped()) - limit = get_max_mapped(); + if (limit > memblock.current_limit) + limit = memblock.current_limit; - /* need to go over early_node_map to find out good range for node */ - for_each_active_range_index_in_nid(i, nid) { - u64 addr; - u64 ei_start, ei_last; + addr = find_memory_core_early(nid, size, align, goal, limit); - ei_last = early_node_map[i].end_pfn; - ei_last <<= PAGE_SHIFT; - ei_start = early_node_map[i].start_pfn; - ei_start <<= PAGE_SHIFT; - addr = find_early_area(ei_start, ei_last, - goal, limit, size, align); - - if (addr == -1ULL) - continue; - -#if 0 - printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n", - nid, - ei_start, ei_last, goal, limit, size, - align, addr); -#endif - - ptr = phys_to_virt(addr); - memset(ptr, 0, size); - reserve_early_without_check(addr, addr + size, "BOOTMEM"); - /* - * The min_count is set to 0 so that bootmem allocated blocks - * are never reported as leaks. - */ - kmemleak_alloc(ptr, size, 0, 0); - return ptr; - } + if (addr == MEMBLOCK_ERROR) + return NULL; - return NULL; + ptr = phys_to_virt(addr); + memset(ptr, 0, size); + memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); + /* + * The min_count is set to 0 so that bootmem allocated blocks + * are never reported as leaks. + */ + kmemleak_alloc(ptr, size, 0, 0); + return ptr; } #endif diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index aa33fd67fa4..29d6cbffb28 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -220,18 +220,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (vmemmap_buf_start) { /* need to free left buf */ -#ifdef CONFIG_NO_BOOTMEM - free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); - if (vmemmap_buf_start < vmemmap_buf) { - char name[15]; - - snprintf(name, sizeof(name), "MEMMAP %d", nodeid); - reserve_early_without_check(__pa(vmemmap_buf_start), - __pa(vmemmap_buf), name); - } -#else free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); -#endif vmemmap_buf = NULL; vmemmap_buf_end = NULL; } -- cgit v1.2.3-70-g09d2 From a9ce6bc15100023b411f8117e53a016d61889800 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:17 -0700 Subject: x86, memblock: Replace e820_/_early string with memblock_ 1.include linux/memblock.h directly. so later could reduce e820.h reference. 2 this patch is done by sed scripts mainly -v2: use MEMBLOCK_ERROR instead of -1ULL or -1UL Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/efi.h | 2 +- arch/x86/kernel/acpi/sleep.c | 9 +++++---- arch/x86/kernel/apic/numaq_32.c | 3 ++- arch/x86/kernel/efi.c | 5 +++-- arch/x86/kernel/head32.c | 4 ++-- arch/x86/kernel/head64.c | 4 ++-- arch/x86/kernel/setup.c | 29 ++++++++++++++--------------- arch/x86/kernel/trampoline.c | 10 +++++----- arch/x86/mm/init.c | 10 ++++++---- arch/x86/mm/init_32.c | 14 ++++++++------ arch/x86/mm/init_64.c | 11 ++++++----- arch/x86/mm/k8topology_64.c | 4 +++- arch/x86/mm/memtest.c | 7 +++---- arch/x86/mm/numa_32.c | 25 +++++++++++++------------ arch/x86/mm/numa_64.c | 34 +++++++++++++++++----------------- arch/x86/mm/srat_32.c | 3 ++- arch/x86/mm/srat_64.c | 11 ++++++----- arch/x86/xen/mmu.c | 5 +++-- arch/x86/xen/setup.c | 3 ++- mm/bootmem.c | 4 ++-- 20 files changed, 105 insertions(+), 92 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8406ed7f992..8e4a16508d4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,7 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern int add_efi_memmap; -extern void efi_reserve_early(void); +extern void efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fcc3c61fdec..d829e75f968 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -125,7 +126,7 @@ void acpi_restore_state_mem(void) */ void __init acpi_reserve_wakeup_memory(void) { - unsigned long mem; + phys_addr_t mem; if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { printk(KERN_ERR @@ -133,15 +134,15 @@ void __init acpi_reserve_wakeup_memory(void) return; } - mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); + mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); - if (mem == -1L) { + if (mem == MEMBLOCK_ERROR) { printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); return; } acpi_realmode = (unsigned long) phys_to_virt(mem); acpi_wakeup_address = mem; - reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); + memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 3e28401f161..960f26ab5c9 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -88,7 +89,7 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd) node_end_pfn[node] = MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); - e820_register_active_regions(node, node_start_pfn[node], + memblock_x86_register_active_regions(node, node_start_pfn[node], node_end_pfn[node]); memory_present(node, node_start_pfn[node], node_end_pfn[node]); diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index c2fa9b8b497..0fe27d7c625 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -275,7 +276,7 @@ static void __init do_add_efi_memmap(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } -void __init efi_reserve_early(void) +void __init efi_memblock_x86_reserve_range(void) { unsigned long pmap; @@ -290,7 +291,7 @@ void __init efi_reserve_early(void) boot_params.efi_info.efi_memdesc_size; memmap.desc_version = boot_params.efi_info.efi_memdesc_version; memmap.desc_size = boot_params.efi_info.efi_memdesc_size; - reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, + memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, "EFI memmap"); } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index da60aa8a850..74e4cf65043 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -42,7 +42,7 @@ void __init i386_start_kernel(void) memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); #endif - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ @@ -51,7 +51,7 @@ void __init i386_start_kernel(void) u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8ee930fdeeb..97adf9828b9 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -101,7 +101,7 @@ void __init x86_64_start_reservations(char *real_mode_data) memblock_init(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ @@ -110,7 +110,7 @@ void __init x86_64_start_reservations(char *real_mode_data) unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); + memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbe0aaf7749..a4f01733e87 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -302,7 +302,7 @@ static inline void init_gbpages(void) static void __init reserve_brk(void) { if (_brk_end > _brk_start) - reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); + memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); /* Mark brk area as locked down and no longer taking any new allocations */ @@ -324,17 +324,16 @@ static void __init relocate_initrd(void) char *p, *q; /* We need to move the initrd down into lowmem */ - ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, + ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, PAGE_SIZE); - if (ramdisk_here == -1ULL) + if (ramdisk_here == MEMBLOCK_ERROR) panic("Cannot find place for new RAMDISK of size %lld\n", ramdisk_size); /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_early(ramdisk_here, ramdisk_here + area_size, - "NEW RAMDISK"); + memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", @@ -390,7 +389,7 @@ static void __init reserve_initrd(void) initrd_start = 0; if (ramdisk_size >= (end_of_lowmem>>1)) { - free_early(ramdisk_image, ramdisk_end); + memblock_x86_free_range(ramdisk_image, ramdisk_end); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; @@ -413,7 +412,7 @@ static void __init reserve_initrd(void) relocate_initrd(); - free_early(ramdisk_image, ramdisk_end); + memblock_x86_free_range(ramdisk_image, ramdisk_end); } #else static void __init reserve_initrd(void) @@ -469,7 +468,7 @@ static void __init e820_reserve_setup_data(void) e820_print_map("reserve setup_data"); } -static void __init reserve_early_setup_data(void) +static void __init memblock_x86_reserve_range_setup_data(void) { struct setup_data *data; u64 pa_data; @@ -481,7 +480,7 @@ static void __init reserve_early_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); sprintf(buf, "setup data %x", data->type); - reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); pa_data = data->next; early_iounmap(data, sizeof(*data)); } @@ -519,23 +518,23 @@ static void __init reserve_crashkernel(void) if (crash_base <= 0) { const unsigned long long alignment = 16<<20; /* 16M */ - crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, + crash_base = memblock_find_in_range(alignment, ULONG_MAX, crash_size, alignment); - if (crash_base == -1ULL) { + if (crash_base == MEMBLOCK_ERROR) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } } else { unsigned long long start; - start = find_e820_area(crash_base, ULONG_MAX, crash_size, + start = memblock_find_in_range(crash_base, ULONG_MAX, crash_size, 1<<20); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; } } - reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); + memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", @@ -786,7 +785,7 @@ void __init setup_arch(char **cmdline_p) #endif 4)) { efi_enabled = 1; - efi_reserve_early(); + efi_memblock_x86_reserve_range(); } #endif @@ -846,7 +845,7 @@ void __init setup_arch(char **cmdline_p) vmi_activate(); /* after early param, so could get panic from serial */ - reserve_early_setup_data(); + memblock_x86_reserve_range_setup_data(); if (acpi_mps_check()) { #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index c652ef62742..7c2102c2aad 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -1,7 +1,7 @@ #include +#include #include -#include #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) #define __trampinit @@ -16,15 +16,15 @@ unsigned char *__trampinitdata trampoline_base; void __init reserve_trampoline_memory(void) { - unsigned long mem; + phys_addr_t mem; /* Has to be in very low memory so we can execute real-mode AP code. */ - mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); - if (mem == -1L) + mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); + if (mem == MEMBLOCK_ERROR) panic("Cannot allocate trampoline\n"); trampoline_base = __va(mem); - reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); + memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); } /* diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index b278535b14a..c0e28a13de7 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, int use_gbpages) { unsigned long puds, pmds, ptes, tables, start; + phys_addr_t base; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); @@ -75,12 +77,12 @@ static void __init find_early_table_space(unsigned long end, int use_pse, #else start = 0x8000; #endif - e820_table_start = find_e820_area(start, max_pfn_mapped<>= PAGE_SHIFT; + e820_table_start = base >> PAGE_SHIFT; e820_table_end = e820_table_start; e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); @@ -299,7 +301,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); if (!after_bootmem && e820_table_end > e820_table_start) - reserve_early(e820_table_start << PAGE_SHIFT, + memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT, e820_table_end << PAGE_SHIFT, "PGTABLE"); if (!after_bootmem) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 90e054589aa..63b09bae250 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -712,14 +713,14 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) highstart_pfn = max_low_pfn; - e820_register_active_regions(0, 0, highend_pfn); + memblock_x86_register_active_regions(0, 0, highend_pfn); sparse_memory_present_with_active_regions(0); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - e820_register_active_regions(0, 0, max_low_pfn); + memblock_x86_register_active_regions(0, 0, max_low_pfn); sparse_memory_present_with_active_regions(0); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; @@ -776,16 +777,16 @@ void __init setup_bootmem_allocator(void) { #ifndef CONFIG_NO_BOOTMEM int nodeid; - unsigned long bootmap_size, bootmap; + phys_addr_t bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ bootmap_size = bootmem_bootmap_pages(max_low_pfn)< #include #include +#include #include #include #include @@ -577,18 +578,18 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, unsigned long bootmap_size, bootmap; bootmap_size = bootmem_bootmap_pages(end_pfn)<> PAGE_SHIFT, 0, end_pfn); - e820_register_active_regions(0, start_pfn, end_pfn); + memblock_x86_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); #else - e820_register_active_regions(0, start_pfn, end_pfn); + memblock_x86_register_active_regions(0, start_pfn, end_pfn); #endif } #endif diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 970ed579d4e..966de9372e8 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include @@ -222,7 +224,7 @@ int __init k8_scan_nodes(void) for_each_node_mask(i, node_possible_map) { int j; - e820_register_active_regions(i, + memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); for (j = apicid_base; j < cores + apicid_base; j++) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 18d244f7020..92faf3a1c53 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -6,8 +6,7 @@ #include #include #include - -#include +#include static u64 patterns[] __initdata = { 0, @@ -35,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) (unsigned long long) pattern, (unsigned long long) start_bad, (unsigned long long) end_bad); - reserve_early(start_bad, end_bad, "BAD RAM"); + memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); } static void __init memtest(u64 pattern, u64 start_phys, u64 size) @@ -74,7 +73,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end) u64 size = 0; while (start < end) { - start = find_e820_area_size(start, &size, 1); + start = memblock_x86_find_in_range_size(start, &size, 1); /* done ? */ if (start >= end) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 809baaaf48b..ddf9730b206 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -120,7 +121,7 @@ int __init get_memcfg_numa_flat(void) node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; - e820_register_active_regions(0, 0, max_pfn); + memblock_x86_register_active_regions(0, 0, max_pfn); memory_present(0, 0, max_pfn); node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); @@ -161,14 +162,14 @@ static void __init allocate_pgdat(int nid) NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; else { unsigned long pgdat_phys; - pgdat_phys = find_e820_area(min_low_pfn<>PAGE_SHIFT)); memset(buf, 0, sizeof(buf)); sprintf(buf, "NODE_DATA %d", nid); - reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); + memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); } printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", nid, (unsigned long)NODE_DATA(nid)); @@ -291,15 +292,15 @@ static __init unsigned long calculate_numa_remap_pages(void) PTRS_PER_PTE); node_kva_target <<= PAGE_SHIFT; do { - node_kva_final = find_e820_area(node_kva_target, + node_kva_final = memblock_find_in_range(node_kva_target, ((u64)node_end_pfn[nid])<>PAGE_SHIFT) > (node_start_pfn[nid])); - if (node_kva_final == -1ULL) + if (node_kva_final == MEMBLOCK_ERROR) panic("Can not get kva ram\n"); node_remap_size[nid] = size; @@ -318,9 +319,9 @@ static __init unsigned long calculate_numa_remap_pages(void) * but we could have some hole in high memory, and it will only * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide * to use it as free. - * So reserve_early here, hope we don't run out of that array + * So memblock_x86_reserve_range here, hope we don't run out of that array */ - reserve_early(node_kva_final, + memblock_x86_reserve_range(node_kva_final, node_kva_final+(((u64)size)<> PAGE_SHIFT; kva_target_pfn -= PTRS_PER_PTE; - } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); + } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); - if (kva_start_pfn == -1UL) + if (kva_start_pfn == MEMBLOCK_ERROR) panic("Can not get kva space\n"); printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", @@ -382,7 +383,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, printk(KERN_INFO "max_pfn = %lx\n", max_pfn); /* avoid clash with initrd */ - reserve_early(kva_start_pfn< physnodes[i].end) { end = physnodes[i].end; @@ -467,7 +467,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, * this one must extend to the boundary. */ if (end < dma32_end && dma32_end - end - - e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) + memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) end = dma32_end; /* @@ -476,7 +476,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, * physical node. */ if (physnodes[i].end - end - - e820_hole_size(end, physnodes[i].end) < size) + memblock_x86_hole_size(end, physnodes[i].end) < size) end = physnodes[i].end; /* @@ -504,7 +504,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) { u64 end = start + size; - while (end - start - e820_hole_size(start, end) < size) { + while (end - start - memblock_x86_hole_size(start, end) < size) { end += FAKE_NODE_MIN_SIZE; if (end > max_addr) { end = max_addr; @@ -533,7 +533,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) * creates a uniform distribution of node sizes across the entire * machine (but not necessarily over physical nodes). */ - min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) / + min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / MAX_NUMNODES; min_size = max(min_size, FAKE_NODE_MIN_SIZE); if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) @@ -566,7 +566,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) * this one must extend to the boundary. */ if (end < dma32_end && dma32_end - end - - e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) + memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) end = dma32_end; /* @@ -575,7 +575,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) * physical node. */ if (physnodes[i].end - end - - e820_hole_size(end, physnodes[i].end) < size) + memblock_x86_hole_size(end, physnodes[i].end) < size) end = physnodes[i].end; /* @@ -639,7 +639,7 @@ static int __init numa_emulation(unsigned long start_pfn, */ remove_all_active_ranges(); for_each_node_mask(i, node_possible_map) { - e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, + memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); setup_node_bootmem(i, nodes[i].start, nodes[i].end); } @@ -692,7 +692,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, node_set(0, node_possible_map); for (i = 0; i < nr_cpu_ids; i++) numa_set_node(i, 0); - e820_register_active_regions(0, start_pfn, last_pfn); + memblock_x86_register_active_regions(0, start_pfn, last_pfn); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); } diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 9324f13492d..a17dffd136c 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -25,6 +25,7 @@ */ #include #include +#include #include #include #include @@ -264,7 +265,7 @@ int __init get_memcfg_from_srat(void) if (node_read_chunk(chunk->nid, chunk)) continue; - e820_register_active_regions(chunk->nid, chunk->start_pfn, + memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn, min(chunk->end_pfn, max_pfn)); } /* for out of order entries in SRAT */ diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index f9897f7a9ef..7f44eb62a5e 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -98,15 +99,15 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) unsigned long phys; length = slit->header.length; - phys = find_e820_area(0, max_pfn_mapped< x2APIC mapping */ @@ -324,7 +325,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) pxmram = 0; } - e820ram = max_pfn - (e820_hole_size(0, max_pfn<>PAGE_SHIFT); + e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<>PAGE_SHIFT); /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { printk(KERN_ERR @@ -421,7 +422,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) } for_each_node_mask(i, nodes_parsed) - e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, + memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); /* for out of order entries in SRAT */ sort_node_map(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 914f04695ce..b511f198691 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -1735,7 +1736,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, __xen_write_cr3(true, __pa(pgd)); xen_mc_issue(PARAVIRT_LAZY_CPU); - reserve_early(__pa(xen_start_info->pt_base), + memblock_x86_reserve_range(__pa(xen_start_info->pt_base), __pa(xen_start_info->pt_base + xen_start_info->nr_pt_frames * PAGE_SIZE), "XEN PAGETABLES"); @@ -1773,7 +1774,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); - reserve_early(__pa(xen_start_info->pt_base), + memblock_x86_reserve_range(__pa(xen_start_info->pt_base), __pa(xen_start_info->pt_base + xen_start_info->nr_pt_frames * PAGE_SIZE), "XEN PAGETABLES"); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ad0047f47cd..2ac8f29f89c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -61,7 +62,7 @@ char * __init xen_memory_setup(void) * - xen_start_info * See comment above "struct start_info" in */ - reserve_early(__pa(xen_start_info->mfn_list), + memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), __pa(xen_start_info->pt_base), "XEN START INFO"); diff --git a/mm/bootmem.c b/mm/bootmem.c index fda01a2c31a..13b0caa9793 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -436,7 +436,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, { #ifdef CONFIG_NO_BOOTMEM kmemleak_free_part(__va(physaddr), size); - free_early(physaddr, physaddr + size); + memblock_x86_free_range(physaddr, physaddr + size); #else unsigned long start, end; @@ -462,7 +462,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size) { #ifdef CONFIG_NO_BOOTMEM kmemleak_free_part(__va(addr), size); - free_early(addr, addr + size); + memblock_x86_free_range(addr, addr + size); #else unsigned long start, end; -- cgit v1.2.3-70-g09d2 From a587d2daebcd2bc159d4348b6a7b028950a6d803 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86: Remove not used early_res code and some functions in e820.c that are not used anymore Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/e820.h | 14 -- arch/x86/kernel/e820.c | 52 ---- include/linux/early_res.h | 23 -- kernel/Makefile | 1 - kernel/early_res.c | 590 -------------------------------------------- 5 files changed, 680 deletions(-) delete mode 100644 include/linux/early_res.h delete mode 100644 kernel/early_res.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 388fed29146..718646384e0 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -112,31 +112,17 @@ static inline void early_memtest(unsigned long start, unsigned long end) } #endif -extern unsigned long end_user_pfn; - -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); - extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); - extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); void memblock_x86_fill(void); - extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); -void reserve_early(u64 start, u64 end, char *name); -void free_early(u64 start, u64 end); - /* * Returns true iff the specified range [s,e) is completely contained inside * the ISA region. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a9221d18a5e..d5fd89462d7 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -738,32 +738,6 @@ static int __init e820_mark_nvs_memory(void) core_initcall(e820_mark_nvs_memory); #endif -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) -{ - u64 mem = memblock_find_in_range(start, end, size, align); - - if (mem == MEMBLOCK_ERROR) - return -1ULL; - - return mem; -} - -/* - * Find next free range after *start - */ -u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) -{ - u64 mem = memblock_x86_find_in_range_size(start, sizep, align); - - if (mem == MEMBLOCK_ERROR) - return -1ULL - - return mem; -} - /* * pre allocated 4k and reserved it in memblock and e820_saved */ @@ -856,32 +830,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void) return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); } -/* Walk the e820 map and register active regions within a node */ -void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - memblock_x86_register_active_regions(nid, start_pfn, last_pfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init e820_hole_size(u64 start, u64 end) -{ - return memblock_x86_hole_size(start, end); -} - -void reserve_early(u64 start, u64 end, char *name) -{ - memblock_x86_reserve_range(start, end, name); -} -void free_early(u64 start, u64 end) -{ - memblock_x86_free_range(start, end); -} - static void early_panic(char *msg) { early_printk(msg); diff --git a/include/linux/early_res.h b/include/linux/early_res.h deleted file mode 100644 index 29c09f57a13..00000000000 --- a/include/linux/early_res.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _LINUX_EARLY_RES_H -#define _LINUX_EARLY_RES_H -#ifdef __KERNEL__ - -extern void reserve_early(u64 start, u64 end, char *name); -extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); -extern void free_early(u64 start, u64 end); -void free_early_partial(u64 start, u64 end); -extern void early_res_to_bootmem(u64 start, u64 end); - -void reserve_early_without_check(u64 start, u64 end, char *name); -u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align); -u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align); -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); -u64 get_max_mapped(void); -#include -int get_free_all_memory_range(struct range **rangep, int nodeid); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_EARLY_RES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 057472fbc27..80e61c3e44a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,7 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o range.o -obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/early_res.c b/kernel/early_res.c deleted file mode 100644 index 7bfae887f21..00000000000 --- a/kernel/early_res.c +++ /dev/null @@ -1,590 +0,0 @@ -/* - * early_res, could be used to replace bootmem - */ -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Early reserved memory areas. - */ -/* - * need to make sure this one is bigger enough before - * find_fw_memmap_area could be used - */ -#define MAX_EARLY_RES_X 32 - -struct early_res { - u64 start, end; - char name[15]; - char overlap_ok; -}; -static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; - -static int max_early_res __initdata = MAX_EARLY_RES_X; -static struct early_res *early_res __initdata = &early_res_x[0]; -static int early_res_count __initdata; - -static int __init find_overlapped_early(u64 start, u64 end) -{ - int i; - struct early_res *r; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - break; - } - - return i; -} - -/* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and - * clearing what had been the last slot. - */ -static void __init drop_range(int i) -{ - int j; - - for (j = i + 1; j < max_early_res && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; - early_res_count--; -} - -static void __init drop_range_partial(int i, u64 start, u64 end) -{ - u64 common_start, common_end; - u64 old_start, old_end; - - old_start = early_res[i].start; - old_end = early_res[i].end; - common_start = max(old_start, start); - common_end = min(old_end, end); - - /* no overlap ? */ - if (common_start >= common_end) - return; - - if (old_start < common_start) { - /* make head segment */ - early_res[i].end = common_start; - if (old_end > common_end) { - char name[15]; - - /* - * Save a local copy of the name, since the - * early_res array could get resized inside - * reserve_early_without_check() -> - * __check_and_double_early_res(), which would - * make the current name pointer invalid. - */ - strncpy(name, early_res[i].name, - sizeof(early_res[i].name) - 1); - /* add another for left over on tail */ - reserve_early_without_check(common_end, old_end, name); - } - return; - } else { - if (old_end > common_end) { - /* reuse the entry for tail left */ - early_res[i].start = common_end; - return; - } - /* all covered */ - drop_range(i); - } -} - -/* - * Split any existing ranges that: - * 1) are marked 'overlap_ok', and - * 2) overlap with the stated range [start, end) - * into whatever portion (if any) of the existing range is entirely - * below or entirely above the stated range. Drop the portion - * of the existing range that overlaps with the stated range, - * which will allow the caller of this routine to then add that - * stated range without conflicting with any existing range. - */ -static void __init drop_overlaps_that_are_ok(u64 start, u64 end) -{ - int i; - struct early_res *r; - u64 lower_start, lower_end; - u64 upper_start, upper_end; - char name[15]; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - - /* Continue past non-overlapping ranges */ - if (end <= r->start || start >= r->end) - continue; - - /* - * Leave non-ok overlaps as is; let caller - * panic "Overlapping early reservations" - * when it hits this overlap. - */ - if (!r->overlap_ok) - return; - - /* - * We have an ok overlap. We will drop it from the early - * reservation map, and add back in any non-overlapping - * portions (lower or upper) as separate, overlap_ok, - * non-overlapping ranges. - */ - - /* 1. Note any non-overlapping (lower or upper) ranges. */ - strncpy(name, r->name, sizeof(name) - 1); - - lower_start = lower_end = 0; - upper_start = upper_end = 0; - if (r->start < start) { - lower_start = r->start; - lower_end = start; - } - if (r->end > end) { - upper_start = end; - upper_end = r->end; - } - - /* 2. Drop the original ok overlapping range */ - drop_range(i); - - i--; /* resume for-loop on copied down entry */ - - /* 3. Add back in any non-overlapping ranges. */ - if (lower_end) - reserve_early_overlap_ok(lower_start, lower_end, name); - if (upper_end) - reserve_early_overlap_ok(upper_start, upper_end, name); - } -} - -static void __init __reserve_early(u64 start, u64 end, char *name, - int overlap_ok) -{ - int i; - struct early_res *r; - - i = find_overlapped_early(start, end); - if (i >= max_early_res) - panic("Too many early reservations"); - r = &early_res[i]; - if (r->end) - panic("Overlapping early reservations " - "%llx-%llx %s to %llx-%llx %s\n", - start, end - 1, name ? name : "", r->start, - r->end - 1, r->name); - r->start = start; - r->end = end; - r->overlap_ok = overlap_ok; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -/* - * A few early reservtations come here. - * - * The 'overlap_ok' in the name of this routine does -not- mean it - * is ok for these reservations to overlap an earlier reservation. - * Rather it means that it is ok for subsequent reservations to - * overlap this one. - * - * Use this entry point to reserve early ranges when you are doing - * so out of "Paranoia", reserving perhaps more memory than you need, - * just in case, and don't mind a subsequent overlapping reservation - * that is known to be needed. - * - * The drop_overlaps_that_are_ok() call here isn't really needed. - * It would be needed if we had two colliding 'overlap_ok' - * reservations, so that the second such would not panic on the - * overlap with the first. We don't have any such as of this - * writing, but might as well tolerate such if it happens in - * the future. - */ -void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) -{ - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 1); -} - -static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) -{ - u64 start, end, size, mem; - struct early_res *new; - - /* do we have enough slots left ? */ - if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) - return; - - /* double it */ - mem = -1ULL; - size = sizeof(struct early_res) * max_early_res * 2; - if (early_res == early_res_x) - start = 0; - else - start = early_res[0].end; - end = ex_start; - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - if (mem == -1ULL) { - start = ex_end; - end = get_max_mapped(); - if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); - } - if (mem == -1ULL) - panic("can not find more space for early_res array"); - - new = __va(mem); - /* save the first one for own */ - new[0].start = mem; - new[0].end = mem + size; - new[0].overlap_ok = 0; - /* copy old to new */ - if (early_res == early_res_x) { - memcpy(&new[1], &early_res[0], - sizeof(struct early_res) * max_early_res); - memset(&new[max_early_res+1], 0, - sizeof(struct early_res) * (max_early_res - 1)); - early_res_count++; - } else { - memcpy(&new[1], &early_res[1], - sizeof(struct early_res) * (max_early_res - 1)); - memset(&new[max_early_res], 0, - sizeof(struct early_res) * max_early_res); - } - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = new; - max_early_res *= 2; - printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", - max_early_res, mem, mem + size - 1); -} - -/* - * Most early reservations come here. - * - * We first have drop_overlaps_that_are_ok() drop any pre-existing - * 'overlap_ok' ranges, so that we can then reserve this memory - * range without risk of panic'ing on an overlapping overlap_ok - * early reservation. - */ -void __init reserve_early(u64 start, u64 end, char *name) -{ - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 0); -} - -void __init reserve_early_without_check(u64 start, u64 end, char *name) -{ - struct early_res *r; - - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - r = &early_res[early_res_count]; - - r->start = start; - r->end = end; - r->overlap_ok = 0; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -void __init free_early(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - i = find_overlapped_early(start, end); - r = &early_res[i]; - if (i >= max_early_res || r->end != end || r->start != start) - panic("free_early on not reserved area: %llx-%llx!", - start, end - 1); - - drop_range(i); -} - -void __init free_early_partial(u64 start, u64 end) -{ - struct early_res *r; - int i; - - kmemleak_free_part(__va(start), end - start); - - if (start == end) - return; - - if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end)) - return; - -try_next: - i = find_overlapped_early(start, end); - if (i >= max_early_res) - return; - - r = &early_res[i]; - /* hole ? */ - if (r->end >= end && r->start <= start) { - drop_range_partial(i, start, end); - return; - } - - drop_range_partial(i, start, end); - goto try_next; -} - -#ifdef CONFIG_NO_BOOTMEM -static void __init subtract_early_res(struct range *range, int az) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - -#define DEBUG_PRINT_EARLY_RES 1 - -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO "Subtract (%d early reservations)\n", count); -#endif - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; -#if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, - r->start, r->end, r->name); -#endif - final_start = PFN_DOWN(r->start); - final_end = PFN_UP(r->end); - if (final_start >= final_end) - continue; - subtract_range(range, az, final_start, final_end); - } - -} - -int __init get_free_all_memory_range(struct range **rangep, int nodeid) -{ - int i, count; - u64 start = 0, end; - u64 size; - u64 mem; - struct range *range; - int nr_range; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - count *= 2; - - size = sizeof(struct range) * count; - end = get_max_mapped(); -#ifdef MAX_DMA32_PFN - if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) - start = MAX_DMA32_PFN << PAGE_SHIFT; -#endif - mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); - if (mem == -1ULL) - panic("can not find more space for range free"); - - range = __va(mem); - /* use early_node_map[] and early_res to get range array at first */ - memset(range, 0, size); - nr_range = 0; - - /* need to go over early_node_map to find out good range for node */ - nr_range = add_from_early_node_map(range, count, nr_range, nodeid); -#ifdef CONFIG_X86_32 - subtract_range(range, count, max_low_pfn, -1ULL); -#endif - subtract_early_res(range, count); - nr_range = clean_sort_range(range, count); - - /* need to clear it ? */ - if (nodeid == MAX_NUMNODES) { - memset(&early_res[0], 0, - sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - } - - *rangep = range; - return nr_range; -} -#else -void __init early_res_to_bootmem(u64 start, u64 end) -{ - int i, count; - u64 final_start, final_end; - int idx = 0; - - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; - - printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", - count - idx, max_early_res, start, end); - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; - printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, - r->start, r->end, r->name); - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) { - printk(KERN_CONT "\n"); - continue; - } - printk(KERN_CONT " ==> [%010llx - %010llx]\n", - final_start, final_end); - reserve_bootmem_generic(final_start, final_end - final_start, - BOOTMEM_DEFAULT); - } - /* clear them */ - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - early_res_count = 0; -} -#endif - -/* Check for already reserved areas */ -static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) -{ - int i; - u64 addr = *addrp; - int changed = 0; - struct early_res *r; -again: - i = find_overlapped_early(addr, addr + size); - r = &early_res[i]; - if (i < max_early_res && r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; - } - return changed; -} - -/* Check for already reserved areas */ -static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) -{ - int i; - u64 addr = *addrp, last; - u64 size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < max_early_res && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; -} - -/* - * Find a free area with specified alignment in a specific range. - * only with the area.between start to end is active range from early_node_map - * so they are good as RAM - */ -u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - goto out; - if (last > end) - goto out; - - return addr; - -out: - return -1ULL; -} - -u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - goto out; - - return addr; - -out: - return -1ULL; -} -- cgit v1.2.3-70-g09d2 From 6f2a75369e7561e800d86927ecd83c970996b21f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86, memblock: Use memblock_memory_size()/memblock_free_memory_size() to get correct dma_reserve memblock_memory_size() will return memory size in memblock.memory.region. memblock_free_memory_size() will return free memory size in memblock.memory.region. So We can get exact reseved size in specified range. Set the size right after initmem_init(), because later bootmem API will get area above 16M. (except some fallback). Later after we remove the bootmem, We could call that just before paging_init(). Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/e820.h | 2 ++ arch/x86/kernel/e820.c | 16 ++++++++++++++++ arch/x86/kernel/setup.c | 1 + arch/x86/mm/init_64.c | 7 ------- 4 files changed, 19 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 718646384e0..5be1542fbfa 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -117,6 +117,8 @@ extern unsigned long e820_end_of_low_ram_pfn(void); extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); void memblock_x86_fill(void); +void memblock_find_dma_reserve(void); + extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d5fd89462d7..0c2b7ef7a34 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1105,3 +1105,19 @@ void __init memblock_x86_fill(void) memblock_analyze(); memblock_dump_all(); } + +void __init memblock_find_dma_reserve(void) +{ +#ifdef CONFIG_X86_64 + u64 free_size_pfn; + u64 mem_size_pfn; + /* + * need to find out used area below MAX_DMA_PFN + * need to use memblock to get free size in [0, MAX_DMA_PFN] + * at first, and assume boot_mem will not take below MAX_DMA_PFN + */ + mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; + free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; + set_dma_reserve(mem_size_pfn - free_size_pfn); +#endif +} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a4f01733e87..924c8f78e98 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1013,6 +1013,7 @@ void __init setup_arch(char **cmdline_p) #endif initmem_init(0, max_pfn, acpi, k8); + memblock_find_dma_reserve(); #ifndef CONFIG_NO_BOOTMEM memblock_x86_to_bootmem(0, max_low_pfn< #include -static unsigned long dma_reserve __initdata; - static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -821,11 +819,6 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, reserve_bootmem(phys, len, flags); - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { - dma_reserve += len / PAGE_SIZE; - set_dma_reserve(dma_reserve); - } - return 0; } #endif -- cgit v1.2.3-70-g09d2 From 774ea0bcb27f57b6fd521b3b6c43237782fed4b9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86: Remove old bootmem code Requested by Ingo, Thomas and HPA. The old bootmem code is no longer necessary, and the transition is complete. Remove it. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 10 +-------- arch/x86/kernel/setup.c | 4 ---- arch/x86/mm/init_32.c | 56 ------------------------------------------------- arch/x86/mm/init_64.c | 41 ------------------------------------ arch/x86/mm/memblock.c | 29 ------------------------- arch/x86/mm/numa_32.c | 3 --- arch/x86/mm/numa_64.c | 47 ----------------------------------------- 7 files changed, 1 insertion(+), 189 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 542bb2610cb..ce07615f1cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -585,15 +585,7 @@ config PARAVIRT_DEBUG a paravirt_op is missing when it is called. config NO_BOOTMEM - default y - bool "Disable Bootmem code" - ---help--- - Use memblock directly instead of bootmem before slab is ready. - - allocator (buddy) [generic] - - early allocator (bootmem) [generic] - - very early allocator (memblock) [some generic] - - very very early allocator (early brk model) [x86] - So reduce one layer between early allocator to final allocator + def_bool y config MEMTEST bool "Memtest" diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 924c8f78e98..1d114ff6a07 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1014,10 +1014,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); memblock_find_dma_reserve(); -#ifndef CONFIG_NO_BOOTMEM - memblock_x86_to_bootmem(0, max_low_pfn<> PAGE_SHIFT, - start_pfn, end_pfn); - printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", - nodeid, start_pfn< max_low_pfn) - continue; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; -#else - start_pfn = 0; - end_pfn = max_low_pfn; -#endif - bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, - bootmap); - } -#endif - after_bootmem = 1; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d6d408467c4..690b8d13971 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -572,23 +572,7 @@ kernel_physical_mapping_init(unsigned long start, void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, int acpi, int k8) { -#ifndef CONFIG_NO_BOOTMEM - unsigned long bootmap_size, bootmap; - - bootmap_size = bootmem_bootmap_pages(end_pfn)<> PAGE_SHIFT, - 0, end_pfn); memblock_x86_register_active_regions(0, start_pfn, end_pfn); - free_bootmem_with_active_regions(0, end_pfn); -#else - memblock_x86_register_active_regions(0, start_pfn, end_pfn); -#endif } #endif @@ -798,31 +782,6 @@ void mark_rodata_ro(void) #endif -#ifndef CONFIG_NO_BOOTMEM -int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, - int flags) -{ - unsigned long pfn = phys >> PAGE_SHIFT; - - if (pfn >= max_pfn) { - /* - * This can happen with kdump kernels when accessing - * firmware tables: - */ - if (pfn < max_pfn_mapped) - return -EFAULT; - - printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n", - phys, len); - return -EFAULT; - } - - reserve_bootmem(phys, len, flags); - - return 0; -} -#endif - int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index aaff3932588..50ecbc59757 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -109,7 +109,6 @@ static __init struct range *find_range_array(int count) return range; } -#ifdef CONFIG_NO_BOOTMEM static void __init memblock_x86_subtract_reserved(struct range *range, int az) { u64 final_start, final_end; @@ -182,34 +181,6 @@ int __init get_free_all_memory_range(struct range **rangep, int nodeid) *rangep = range; return nr_range; } -#else -void __init memblock_x86_to_bootmem(u64 start, u64 end) -{ - int count; - u64 final_start, final_end; - struct memblock_region *r; - - /* Take out region array itself */ - memblock_free_reserved_regions(); - - count = memblock.reserved.cnt; - memblock_dbg("(%d early reservations) ==> bootmem [%#010llx-%#010llx]\n", count, start, end - 1); - for_each_memblock(reserved, r) { - memblock_dbg(" [%#010llx-%#010llx] ", (u64)r->base, (u64)r->base + r->size - 1); - final_start = max(start, r->base); - final_end = min(end, r->base + r->size); - if (final_start >= final_end) { - memblock_dbg("\n"); - continue; - } - memblock_dbg(" ==> [%#010llx-%#010llx]\n", final_start, final_end - 1); - reserve_bootmem_generic(final_start, final_end - final_start, BOOTMEM_DEFAULT); - } - - /* Put region array back ? */ - memblock_reserve_reserved_regions(); -} -#endif static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free) { diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index ddf9730b206..70ddeb75ba2 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -420,9 +420,6 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, for_each_online_node(nid) { memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); NODE_DATA(nid)->node_id = nid; -#ifndef CONFIG_NO_BOOTMEM - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; -#endif } setup_bootmem_allocator(); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 984b1ff7db4..aef0ff74f7d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -199,10 +199,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) unsigned long start_pfn, last_pfn, nodedata_phys; const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); int nid; -#ifndef CONFIG_NO_BOOTMEM - unsigned long bootmap_start, bootmap_pages, bootmap_size; - void *bootmap; -#endif if (!end) return; @@ -239,47 +235,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; -#ifndef CONFIG_NO_BOOTMEM - NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; - - /* - * Find a place for the bootmem map - * nodedata_phys could be on other nodes by alloc_bootmem, - * so need to sure bootmap_start not to be small, otherwise - * early_node_mem will get that with memblock_find_in_range instead - * of alloc_bootmem, that could clash with reserved range - */ - bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); - bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); - /* - * SMP_CACHE_BYTES could be enough, but init_bootmem_node like - * to use that to align to PAGE_SIZE - */ - bootmap = early_node_mem(nodeid, bootmap_start, end, - bootmap_pages<> PAGE_SHIFT, - start_pfn, last_pfn); - - printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", - bootmap_start, bootmap_start + bootmap_size - 1, - bootmap_pages); - nid = phys_to_nid(bootmap_start); - if (nid != nodeid) - printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); - - free_bootmem_with_active_regions(nodeid, end); -#endif - node_set_online(nodeid); } @@ -704,9 +659,7 @@ unsigned long __init numa_free_all_bootmem(void) for_each_online_node(i) pages += free_all_bootmem_node(NODE_DATA(i)); -#ifdef CONFIG_NO_BOOTMEM pages += free_all_memory_core_early(MAX_NUMNODES); -#endif return pages; } -- cgit v1.2.3-70-g09d2 From 9f4c13964b58608fbce05540743281ea3146c0e8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 5 Oct 2010 16:05:14 -0700 Subject: x86, memblock: Fix crashkernel allocation Cai Qian found crashkernel is broken with the x86 memblock changes. 1. crashkernel=128M@32M always reported that range is used, even if the first kernel is small and does not usethat range 2. we always got following report when using "kexec -p" Could not find a free area of memory of a000 bytes... locate_hole failed The root cause is that generic memblock_find_in_range() will try to allocate from the top of the range, whereas the kexec code was written assuming that allocation was always near the bottom and that it could blindly extend memory upward. Unfortunately the kexec code doesn't have a system for requesting the range that it really needs, so this is subject to probabilistic failures. This patch hacks around the problem by limiting the target range heuristically to below the traditional bzImage max range. This number is arbitrary and not always correct, and a much better result would be obtained by having kexec communicate this number based on the kernel header information and any appropriate command line options. Reported-and-Bisected-by: CAI Qian Signed-off-by: Yinghai Lu LKML-Reference: <4CABAF2A.5090501@kernel.org> Cc: Vivek Goyal Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bf89e0a59b8..b11a238b2e3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -502,6 +502,7 @@ static inline unsigned long long get_total_mem(void) return total << PAGE_SHIFT; } +#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF static void __init reserve_crashkernel(void) { unsigned long long total_mem; @@ -519,8 +520,12 @@ static void __init reserve_crashkernel(void) if (crash_base <= 0) { const unsigned long long alignment = 16<<20; /* 16M */ - crash_base = memblock_find_in_range(alignment, ULONG_MAX, crash_size, - alignment); + /* + * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX + */ + crash_base = memblock_find_in_range(alignment, + DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); + if (crash_base == MEMBLOCK_ERROR) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; @@ -528,8 +533,8 @@ static void __init reserve_crashkernel(void) } else { unsigned long long start; - start = memblock_find_in_range(crash_base, ULONG_MAX, crash_size, - 1<<20); + start = memblock_find_in_range(crash_base, + crash_base + crash_size, crash_size, 1<<20); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; -- cgit v1.2.3-70-g09d2 From 1d931264af0f10649b35afa8fbd2e169da51ac08 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 5 Oct 2010 16:15:15 -0700 Subject: x86-32, memblock: Make add_highpages honor early reserved ranges Originally the only early reserved range that is overlapped with high pages is "KVA RAM", but we already do remove that from the active ranges. However, It turns out Xen could have that kind of overlapping to support memory ballooning.x So we need to make add_highpage_with_active_regions() to subtract memblock reserved just like low ram; this is the proper design anyway. In this patch, refactering get_freel_all_memory_range() to make it can be used by add_highpage_with_active_regions(). Also we don't need to remove "KVA RAM" from active ranges. Signed-off-by: Yinghai Lu LKML-Reference: <4CABB183.1040607@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/memblock.h | 2 ++ arch/x86/mm/init_32.c | 53 +++++++++++++---------------------------- arch/x86/mm/memblock.c | 19 +++++++++++---- arch/x86/mm/numa_32.c | 2 -- 4 files changed, 33 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 2c304bb6e07..19ae14ba697 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -9,6 +9,8 @@ void memblock_x86_to_bootmem(u64 start, u64 end); void memblock_x86_reserve_range(u64 start, u64 end, char *name); void memblock_x86_free_range(u64 start, u64 end); struct range; +int __get_free_all_memory_range(struct range **range, int nodeid, + unsigned long start_pfn, unsigned long end_pfn); int get_free_all_memory_range(struct range **rangep, int nodeid); void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c2385d7ae31..85467099d6d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -423,49 +423,28 @@ static void __init add_one_highpage_init(struct page *page) totalhigh_pages++; } -struct add_highpages_data { - unsigned long start_pfn; - unsigned long end_pfn; -}; - -static int __init add_highpages_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) +void __init add_highpages_with_active_regions(int nid, + unsigned long start_pfn, unsigned long end_pfn) { - int node_pfn; - struct page *page; - unsigned long final_start_pfn, final_end_pfn; - struct add_highpages_data *data; + struct range *range; + int nr_range; + int i; - data = (struct add_highpages_data *)datax; + nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); - final_start_pfn = max(start_pfn, data->start_pfn); - final_end_pfn = min(end_pfn, data->end_pfn); - if (final_start_pfn >= final_end_pfn) - return 0; + for (i = 0; i < nr_range; i++) { + struct page *page; + int node_pfn; - for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; - node_pfn++) { - if (!pfn_valid(node_pfn)) - continue; - page = pfn_to_page(node_pfn); - add_one_highpage_init(page); + for (node_pfn = range[i].start; node_pfn < range[i].end; + node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + add_one_highpage_init(page); + } } - - return 0; - } - -void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - struct add_highpages_data data; - - data.start_pfn = start_pfn; - data.end_pfn = end_pfn; - - work_with_active_regions(nid, add_highpages_work_fn, &data); -} - #else static inline void permanent_kmaps_init(pgd_t *pgd_base) { diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index 50ecbc59757..fd7a0404945 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -156,7 +156,8 @@ static int __init count_early_node_map(int nodeid) return data.nr; } -int __init get_free_all_memory_range(struct range **rangep, int nodeid) +int __init __get_free_all_memory_range(struct range **rangep, int nodeid, + unsigned long start_pfn, unsigned long end_pfn) { int count; struct range *range; @@ -172,9 +173,9 @@ int __init get_free_all_memory_range(struct range **rangep, int nodeid) * at first */ nr_range = add_from_early_node_map(range, count, nr_range, nodeid); -#ifdef CONFIG_X86_32 - subtract_range(range, count, max_low_pfn, -1ULL); -#endif + subtract_range(range, count, 0, start_pfn); + subtract_range(range, count, end_pfn, -1ULL); + memblock_x86_subtract_reserved(range, count); nr_range = clean_sort_range(range, count); @@ -182,6 +183,16 @@ int __init get_free_all_memory_range(struct range **rangep, int nodeid) return nr_range; } +int __init get_free_all_memory_range(struct range **rangep, int nodeid) +{ + unsigned long end_pfn = -1UL; + +#ifdef CONFIG_X86_32 + end_pfn = max_low_pfn; +#endif + return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn); +} + static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free) { int i, count; diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 70ddeb75ba2..84a3e4c9f27 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -326,8 +326,6 @@ static __init unsigned long calculate_numa_remap_pages(void) "KVA RAM"); node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; - remove_active_range(nid, node_remap_start_pfn[nid], - node_remap_start_pfn[nid] + size); } printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", reserve_pages); -- cgit v1.2.3-70-g09d2 From 16c36f743bf8481d0ba40a6de0af11736095d7cf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 4 Oct 2010 14:58:04 -0700 Subject: x86, memblock: Remove __memblock_x86_find_in_range_size() Fold it into memblock_x86_find_in_range(), and change bad_addr_size() to check_reserve_memblock(). So whole memblock_x86_find_in_range_size() code is more readable. Signed-off-by: Yinghai Lu LKML-Reference: <4CAA4DEC.4000401@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/memblock.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index fd7a0404945..aa1169392b8 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -8,7 +8,7 @@ #include /* Check for already reserved areas */ -static inline bool __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align) { struct memblock_region *r; u64 addr = *addrp, last; @@ -30,7 +30,7 @@ again: goto again; } if (last <= (r->base + r->size) && addr >= r->base) { - (*sizep)++; + *sizep = 0; return false; } } @@ -41,29 +41,6 @@ again: return changed; } -static u64 __init __memblock_x86_find_in_range_size(u64 ei_start, u64 ei_last, u64 start, - u64 *sizep, u64 align) -{ - u64 addr, last; - - addr = round_up(ei_start, align); - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - goto out; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - goto out; - - return addr; - -out: - return MEMBLOCK_ERROR; -} - /* * Find next free range after start, and size is returned in *sizep */ @@ -76,10 +53,16 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) u64 ei_last = ei_start + r->size; u64 addr; - addr = __memblock_x86_find_in_range_size(ei_start, ei_last, start, - sizep, align); + addr = round_up(ei_start, align); + if (addr < start) + addr = round_up(start, align); + if (addr >= ei_last) + continue; + *sizep = ei_last - addr; + while (check_with_memblock_reserved_size(&addr, sizep, align)) + ; - if (addr != MEMBLOCK_ERROR) + if (*sizep) return addr; } -- cgit v1.2.3-70-g09d2 From 236260b90dd94516982ad67aa6f5449c4c37db7b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 6 Oct 2010 15:52:29 -0700 Subject: memblock: Allow memblock_init to be called early The Xen setup code needs to call memblock_x86_reserve_range() very early, so allow it to initialize the memblock subsystem before doing so. The second memblock_init() is ignored. Signed-off-by: Jeremy Fitzhardinge Cc: Yinghai Lu Cc: Benjamin Herrenschmidt LKML-Reference: <4CACFDAD.3090900@goop.org> Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 3 +++ mm/memblock.c | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7d46c844141..63b83ceebd1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1183,6 +1184,8 @@ asmlinkage void __init xen_start_kernel(void) local_irq_disable(); early_boot_irqs_off(); + memblock_init(); + xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); diff --git a/mm/memblock.c b/mm/memblock.c index 9ad39690a2b..ae8b06c828c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -752,6 +752,12 @@ void __init memblock_analyze(void) void __init memblock_init(void) { + static int init_done __initdata = 0; + + if (init_done) + return; + init_done = 1; + /* Hookup the initial arrays */ memblock.memory.regions = memblock_memory_init_regions; memblock.memory.max = INIT_MEMBLOCK_REGIONS; -- cgit v1.2.3-70-g09d2 From fef5ba797991f9335bcfc295942b684f9bf613a1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 Oct 2010 16:02:24 -0700 Subject: xen: Cope with unmapped pages when initializing kernel pagetable Xen requires that all pages containing pagetable entries to be mapped read-only. If pages used for the initial pagetable are already mapped then we can change the mapping to RO. However, if they are initially unmapped, we need to make sure that when they are later mapped, they are also mapped RO. We do this by knowing that the kernel pagetable memory is pre-allocated in the range e820_table_start - e820_table_end, so any pfn within this range should be mapped read-only. However, the pagetable setup code early_ioremaps the pages to write their entries, so we must make sure that mappings created in the early_ioremap fixmap area are mapped RW. (Those mappings are removed before the pages are presented to Xen as pagetable pages.) Signed-off-by: Jeremy Fitzhardinge LKML-Reference: <4CB63A80.8060702@goop.org> Cc: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io.h | 1 + arch/x86/mm/ioremap.c | 5 +++++ arch/x86/xen/mmu.c | 26 ++++++++++++++++++-------- 3 files changed, 24 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 30a3e977612..66aee6c4123 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -348,6 +348,7 @@ extern void __iomem *early_memremap(resource_size_t phys_addr, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); extern void fixup_early_ioremap(void); +extern bool is_early_ioremap_ptep(pte_t *ptep); #define IO_SPACE_LIMIT 0xffff diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 3ba6e0608c5..0369843511d 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -362,6 +362,11 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } +bool __init is_early_ioremap_ptep(pte_t *ptep) +{ + return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; +} + static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; void __init early_ioremap_init(void) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4fe04ac0bae..7d55e9ee3a7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -360,7 +361,8 @@ void make_lowmem_page_readonly(void *vaddr) unsigned int level; pte = lookup_address(address, &level); - BUG_ON(pte == NULL); + if (pte == NULL) + return; /* vaddr missing */ ptev = pte_wrprotect(*pte); @@ -375,7 +377,8 @@ void make_lowmem_page_readwrite(void *vaddr) unsigned int level; pte = lookup_address(address, &level); - BUG_ON(pte == NULL); + if (pte == NULL) + return; /* vaddr missing */ ptev = pte_mkwrite(*pte); @@ -1509,13 +1512,25 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -#ifdef CONFIG_X86_32 static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) { + unsigned long pfn = pte_pfn(pte); + +#ifdef CONFIG_X86_32 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ if (pte_val_ma(*ptep) & _PAGE_PRESENT) pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & pte_val_ma(pte)); +#endif + + /* + * If the new pfn is within the range of the newly allocated + * kernel pagetable, and it isn't being mapped into an + * early_ioremap fixmap slot, make sure it is RO. + */ + if (!is_early_ioremap_ptep(ptep) && + pfn >= e820_table_start && pfn < e820_table_end) + pte = pte_wrprotect(pte); return pte; } @@ -1528,7 +1543,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) xen_set_pte(ptep, pte); } -#endif static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { @@ -1973,11 +1987,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .alloc_pmd_clone = paravirt_nop, .release_pmd = xen_release_pmd_init, -#ifdef CONFIG_X86_64 - .set_pte = xen_set_pte, -#else .set_pte = xen_set_pte_init, -#endif .set_pte_at = xen_set_pte_at, .set_pmd = xen_set_pmd_hyper, -- cgit v1.2.3-70-g09d2 From 67e87f0a1c5cbc750f81ebf6a128e8ff6f4376cc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 Oct 2010 16:34:15 -0700 Subject: x86-64: Only set max_pfn_mapped to 512 MiB if we enter via head_64.S head_64.S maps up to 512 MiB, but that is not necessarity true for other entry paths, such as Xen. Thus, co-locate the setting of max_pfn_mapped with the code to actually set up the page tables in head_64.S. The 32-bit code is already so co-located. (The Xen code already sets max_pfn_mapped correctly for its own use case.) -v2: Yinghai fixed the following bug in this patch: | | max_pfn_mapped is in .bss section, so we need to set that | after bss get cleared. Without that we crash on bootup. | | That is safe because Xen does not call x86_64_start_kernel(). | Signed-off-by: Jeremy Fitzhardinge Fixed-by: Yinghai Lu Signed-off-by: H. Peter Anvin LKML-Reference: <4CB6AB24.9020504@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/setup.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 97adf9828b9..2d2673c28af 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -80,6 +80,8 @@ void __init x86_64_start_kernel(char * real_mode_data) /* Cleanup the over mapped high alias */ cleanup_highmap(); + max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { #ifdef CONFIG_EARLY_PRINTK set_intr_gate(i, &early_idt_handlers[i]); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b11a238b2e3..c3cebfe7bfc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -932,7 +932,6 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = max_pfn; high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; - max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif /* -- cgit v1.2.3-70-g09d2