diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-21 19:05:45 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-21 19:05:45 -0800 |
commit | df32e43a54d04eda35d2859beaf90e3864d53288 (patch) | |
tree | 7a61cf658b2949bd426285eb9902be7758ced1ba /arch | |
parent | fbd918a2026d0464ce9c23f57b7de4bcfccdc2e6 (diff) | |
parent | 78d5506e82b21a1a1de68c24182db2c2fe521422 (diff) |
Merge branch 'akpm' (incoming from Andrew)
Merge first patch-bomb from Andrew Morton:
- a couple of misc things
- inotify/fsnotify work from Jan
- ocfs2 updates (partial)
- about half of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits)
mm/migrate: remove unused function, fail_migrate_page()
mm/migrate: remove putback_lru_pages, fix comment on putback_movable_pages
mm/migrate: correct failure handling if !hugepage_migration_support()
mm/migrate: add comment about permanent failure path
mm, page_alloc: warn for non-blockable __GFP_NOFAIL allocation failure
mm: compaction: reset scanner positions immediately when they meet
mm: compaction: do not mark unmovable pageblocks as skipped in async compaction
mm: compaction: detect when scanners meet in isolate_freepages
mm: compaction: reset cached scanner pfn's before reading them
mm: compaction: encapsulate defer reset logic
mm: compaction: trace compaction begin and end
memcg, oom: lock mem_cgroup_print_oom_info
sched: add tracepoints related to NUMA task migration
mm: numa: do not automatically migrate KSM pages
mm: numa: trace tasks that fail migration due to rate limiting
mm: numa: limit scope of lock for NUMA migrate rate limiting
mm: numa: make NUMA-migrate related functions static
lib/show_mem.c: show num_poisoned_pages when oom
mm/hwpoison: add '#' to hwpoison_inject
mm/memblock: use WARN_ONCE when MAX_NUMNODES passed as input parameter
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/include/asm/dma.h | 4 | ||||
-rw-r--r-- | arch/arm/kernel/devtree.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/arm/mach-omap2/omap_hwmod.c | 8 | ||||
-rw-r--r-- | arch/arm/mm/init.c | 5 | ||||
-rw-r--r-- | arch/ia64/mm/contig.c | 68 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 63 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 48 | ||||
-rw-r--r-- | arch/metag/mm/init.c | 3 | ||||
-rw-r--r-- | arch/metag/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/microblaze/mm/init.c | 3 | ||||
-rw-r--r-- | arch/parisc/mm/init.c | 59 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 8 | ||||
-rw-r--r-- | arch/score/Kconfig | 1 | ||||
-rw-r--r-- | arch/sh/kernel/kgdb.c | 1 | ||||
-rw-r--r-- | arch/sh/kernel/setup.c | 4 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 5 | ||||
-rw-r--r-- | arch/unicore32/mm/init.c | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/page_types.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/check.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 52 | ||||
-rw-r--r-- | arch/x86/mm/srat.c | 5 |
28 files changed, 153 insertions, 212 deletions
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index 58b8c6a0ab1..99084431d6a 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -8,8 +8,8 @@ #define MAX_DMA_ADDRESS 0xffffffffUL #else #define MAX_DMA_ADDRESS ({ \ - extern unsigned long arm_dma_zone_size; \ - arm_dma_zone_size ? \ + extern phys_addr_t arm_dma_zone_size; \ + arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \ (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; }) #endif diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 34d5fd585bb..f751714d52c 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -33,7 +33,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return alloc_bootmem_align(size, align); + return memblock_virt_alloc(size, align); } void __init arm_dt_memblock_reserve(void) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 987a7f5bce5..8ce1cbd08db 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -717,7 +717,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) kernel_data.end = virt_to_phys(_end - 1); for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = memblock_virt_alloc(sizeof(*res), 0); res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 8a1b5e0bad4..f7a6fd35b1e 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2791,9 +2791,7 @@ static int __init _alloc_links(struct omap_hwmod_link **ml, sz = sizeof(struct omap_hwmod_link) * LINKS_PER_OCP_IF; *sl = NULL; - *ml = alloc_bootmem(sz); - - memset(*ml, 0, sz); + *ml = memblock_virt_alloc(sz, 0); *sl = (void *)(*ml) + sizeof(struct omap_hwmod_link); @@ -2912,9 +2910,7 @@ static int __init _alloc_linkspace(struct omap_hwmod_ocp_if **ois) pr_debug("omap_hwmod: %s: allocating %d byte linkspace (%d links)\n", __func__, sz, max_ls); - linkspace = alloc_bootmem(sz); - - memset(linkspace, 0, sz); + linkspace = memblock_virt_alloc(sz, 0); return 0; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 3e8f106ee5f..11eb8add782 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -92,9 +92,6 @@ void show_mem(unsigned int filter) printk("Mem-info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_bank (i, mi) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; @@ -461,7 +458,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * free the section of the memmap array. */ if (pg < pgend) - free_bootmem(pg, pgend - pg); + memblock_free_early(pg, pgend - pg); } /* diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index da5237d636d..52715a71aed 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -31,74 +31,6 @@ static unsigned long max_gap; #endif -/** - * show_mem - give short summary of memory stats - * - * Shows a simple page count of reserved and used pages in the system. - * For discontig machines, it does this on a per-pgdat basis. - */ -void show_mem(unsigned int filter) -{ - int i, total_reserved = 0; - int total_shared = 0, total_cached = 0; - unsigned long total_present = 0; - pg_data_t *pgdat; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(filter); - printk(KERN_INFO "Node memory in pages:\n"); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_online_pgdat(pgdat) { - unsigned long present; - unsigned long flags; - int shared = 0, cached = 0, reserved = 0; - int nid = pgdat->node_id; - - if (skip_free_areas_node(filter, nid)) - continue; - pgdat_resize_lock(pgdat, &flags); - present = pgdat->node_present_pages; - for(i = 0; i < pgdat->node_spanned_pages; i++) { - struct page *page; - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) - touch_nmi_watchdog(); - if (pfn_valid(pgdat->node_start_pfn + i)) - page = pfn_to_page(pgdat->node_start_pfn + i); - else { -#ifdef CONFIG_VIRTUAL_MEM_MAP - if (max_gap < LARGE_GAP) - continue; -#endif - i = vmemmap_find_next_valid_pfn(nid, i) - 1; - continue; - } - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page)-1; - } - pgdat_resize_unlock(pgdat, &flags); - total_present += present; - total_reserved += reserved; - total_cached += cached; - total_shared += shared; - printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " - "shrd: %10d, swpd: %10d\n", nid, - present, reserved, shared, cached); - } - printk(KERN_INFO "%ld pages of RAM\n", total_present); - printk(KERN_INFO "%d reserved pages\n", total_reserved); - printk(KERN_INFO "%d pages shared\n", total_shared); - printk(KERN_INFO "%d pages swap cached\n", total_cached); - printk(KERN_INFO "Total of %ld pages in page table cache\n", - quicklist_total_size()); - printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); -} - - /* physical address where the bootmem map is located */ unsigned long bootmap_start; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 2de08f4d993..87862680536 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -608,69 +608,6 @@ void *per_cpu_init(void) #endif /* CONFIG_SMP */ /** - * show_mem - give short summary of memory stats - * - * Shows a simple page count of reserved and used pages in the system. - * For discontig machines, it does this on a per-pgdat basis. - */ -void show_mem(unsigned int filter) -{ - int i, total_reserved = 0; - int total_shared = 0, total_cached = 0; - unsigned long total_present = 0; - pg_data_t *pgdat; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - printk(KERN_INFO "Node memory in pages:\n"); - for_each_online_pgdat(pgdat) { - unsigned long present; - unsigned long flags; - int shared = 0, cached = 0, reserved = 0; - int nid = pgdat->node_id; - - if (skip_free_areas_node(filter, nid)) - continue; - pgdat_resize_lock(pgdat, &flags); - present = pgdat->node_present_pages; - for(i = 0; i < pgdat->node_spanned_pages; i++) { - struct page *page; - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) - touch_nmi_watchdog(); - if (pfn_valid(pgdat->node_start_pfn + i)) - page = pfn_to_page(pgdat->node_start_pfn + i); - else { - i = vmemmap_find_next_valid_pfn(nid, i) - 1; - continue; - } - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page)-1; - } - pgdat_resize_unlock(pgdat, &flags); - total_present += present; - total_reserved += reserved; - total_cached += cached; - total_shared += shared; - printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, " - "shrd: %10d, swpd: %10d\n", nid, - present, reserved, shared, cached); - } - printk(KERN_INFO "%ld pages of RAM\n", total_present); - printk(KERN_INFO "%d reserved pages\n", total_reserved); - printk(KERN_INFO "%d pages shared\n", total_shared); - printk(KERN_INFO "%d pages swap cached\n", total_cached); - printk(KERN_INFO "Total of %ld pages in page table cache\n", - quicklist_total_size()); - printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); -} - -/** * call_pernode_memory - use SRAT to call callback functions with node info * @start: physical start of range * @len: length of range diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 88504abf570..25c350264a4 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -684,3 +684,51 @@ per_linux32_init(void) } __initcall(per_linux32_init); + +/** + * show_mem - give short summary of memory stats + * + * Shows a simple page count of reserved and used pages in the system. + * For discontig machines, it does this on a per-pgdat basis. + */ +void show_mem(unsigned int filter) +{ + int total_reserved = 0; + unsigned long total_present = 0; + pg_data_t *pgdat; + + printk(KERN_INFO "Mem-info:\n"); + show_free_areas(filter); + printk(KERN_INFO "Node memory in pages:\n"); + for_each_online_pgdat(pgdat) { + unsigned long present; + unsigned long flags; + int reserved = 0; + int nid = pgdat->node_id; + int zoneid; + + if (skip_free_areas_node(filter, nid)) + continue; + pgdat_resize_lock(pgdat, &flags); + + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + struct zone *zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + reserved += zone->present_pages - zone->managed_pages; + } + present = pgdat->node_present_pages; + + pgdat_resize_unlock(pgdat, &flags); + total_present += present; + total_reserved += reserved; + printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, ", + nid, present, reserved); + } + printk(KERN_INFO "%ld pages of RAM\n", total_present); + printk(KERN_INFO "%d reserved pages\n", total_reserved); + printk(KERN_INFO "Total of %ld pages in page table cache\n", + quicklist_total_size()); + printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages()); +} diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index 3cd6288f65c..11fa51c8961 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -204,7 +204,8 @@ static void __init do_init_bootmem(void) start_pfn = memblock_region_memory_base_pfn(reg); end_pfn = memblock_region_memory_end_pfn(reg); memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), 0); + PFN_PHYS(end_pfn - start_pfn), + &memblock.memory, 0); } /* All of system RAM sits in node 0 for the non-NUMA case */ diff --git a/arch/metag/mm/numa.c b/arch/metag/mm/numa.c index b172aa45fcf..67b46c29507 100644 --- a/arch/metag/mm/numa.c +++ b/arch/metag/mm/numa.c @@ -42,7 +42,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) memblock_add(start, end - start); memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), nid); + PFN_PHYS(end_pfn - start_pfn), + &memblock.memory, nid); /* Node-local pgdat */ pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data), diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 74c7bcc1e82..89077d34671 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -192,7 +192,8 @@ void __init setup_memory(void) start_pfn = memblock_region_memory_base_pfn(reg); end_pfn = memblock_region_memory_end_pfn(reg); memblock_set_node(start_pfn << PAGE_SHIFT, - (end_pfn - start_pfn) << PAGE_SHIFT, 0); + (end_pfn - start_pfn) << PAGE_SHIFT, + &memblock.memory, 0); } /* free bootmem is whole main memory */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 96f8168cf4e..ae085ad0fba 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -645,55 +645,30 @@ EXPORT_SYMBOL(empty_zero_page); void show_mem(unsigned int filter) { - int i,free = 0,total = 0,reserved = 0; - int shared = 0, cached = 0; + int total = 0,reserved = 0; + pg_data_t *pgdat; printk(KERN_INFO "Mem-info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; -#ifndef CONFIG_DISCONTIGMEM - i = max_mapnr; - while (i-- > 0) { - total++; - if (PageReserved(mem_map+i)) - reserved++; - else if (PageSwapCache(mem_map+i)) - cached++; - else if (!page_count(&mem_map[i])) - free++; - else - shared += page_count(&mem_map[i]) - 1; - } -#else - for (i = 0; i < npmem_ranges; i++) { - int j; - for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { - struct page *p; - unsigned long flags; - - pgdat_resize_lock(NODE_DATA(i), &flags); - p = nid_page_nr(i, j) - node_start_pfn(i); - - total++; - if (PageReserved(p)) - reserved++; - else if (PageSwapCache(p)) - cached++; - else if (!page_count(p)) - free++; - else - shared += page_count(p) - 1; - pgdat_resize_unlock(NODE_DATA(i), &flags); - } + for_each_online_pgdat(pgdat) { + unsigned long flags; + int zoneid; + + pgdat_resize_lock(pgdat, &flags); + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + struct zone *zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + total += zone->present_pages; + reserved = zone->present_pages - zone->managed_pages; + } + pgdat_resize_unlock(pgdat, &flags); } -#endif + printk(KERN_INFO "%d pages of RAM\n", total); printk(KERN_INFO "%d reserved pages\n", reserved); - printk(KERN_INFO "%d pages shared\n", shared); - printk(KERN_INFO "%d pages swap cached\n", cached); - #ifdef CONFIG_DISCONTIGMEM { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3fa93dc7fe7..8c1dd23652a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -209,7 +209,7 @@ void __init do_init_bootmem(void) /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); /* Add all physical memory to the bootmem map, mark each area * present. diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 078d3e00a61..5a944f25e94 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -670,7 +670,8 @@ static void __init parse_drconf_memory(struct device_node *memory) node_set_online(nid); sz = numa_enforce_memory_limit(base, size); if (sz) - memblock_set_node(base, sz, nid); + memblock_set_node(base, sz, + &memblock.memory, nid); } while (--ranges); } } @@ -760,7 +761,7 @@ new_range: continue; } - memblock_set_node(start, size, nid); + memblock_set_node(start, size, &memblock.memory, nid); if (--ranges) goto new_range; @@ -797,7 +798,8 @@ static void __init setup_nonnuma(void) fake_numa_create_new_node(end_pfn, &nid); memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), nid); + PFN_PHYS(end_pfn - start_pfn), + &memblock.memory, nid); node_set_online(nid); } } diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 305f7ee1f38..c75d06aa27c 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -2,7 +2,6 @@ menu "Machine selection" config SCORE def_bool y - select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_ATOMIC64 diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 38b313909ac..adad46e41a1 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -13,6 +13,7 @@ #include <linux/kdebug.h> #include <linux/irq.h> #include <linux/io.h> +#include <linux/sched.h> #include <asm/cacheflush.h> #include <asm/traps.h> diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 1cf90e947db..de19cfa768f 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -230,8 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, pmb_bolt_mapping((unsigned long)__va(start), start, end - start, PAGE_KERNEL); - memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), nid); + memblock_set_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), + &memblock.memory, nid); } void __init __weak plat_early_device_setup(void) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 5322e530d09..eafbc65c9c4 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1021,7 +1021,8 @@ static void __init add_node_ranges(void) "start[%lx] end[%lx]\n", nid, start, this_end); - memblock_set_node(start, this_end - start, nid); + memblock_set_node(start, this_end - start, + &memblock.memory, nid); start = this_end; } } @@ -1325,7 +1326,7 @@ static void __init bootmem_init_nonnuma(void) (top_of_ram - total_ram) >> 20); init_node_masks_nonnuma(); - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); allocate_node_data(0); node_set_online(0); } diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index ae6bc036db9..be2bde9b07c 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -66,9 +66,6 @@ void show_mem(unsigned int filter) printk(KERN_DEFAULT "Mem-info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_bank(i, mi) { struct membank *bank = &mi->bank[i]; unsigned int pfn1, pfn2; diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb6..2f59cce3b38 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; -static inline phys_addr_t get_max_mapped(void) +static inline phys_addr_t get_max_low_mapped(void) { - return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; + return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT; } bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index e2dbcb7dabd..83a7995625a 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c @@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void) corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); - for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { + for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) { start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), PAGE_SIZE, corruption_check_size); end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 174da5fc5a7..988c00a1f60 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void) nr_pages += end_pfn - start_pfn; } - for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { + for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) { start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); if (start_pfn < end_pfn) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 06853e67035..c9675594d7c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); - memblock_set_current_limit(get_max_mapped()); + memblock_set_current_limit(get_max_low_mapped()); dma_contiguous_reserve(0); /* diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5bdc5430597..e39504878ae 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -665,7 +665,7 @@ void __init initmem_init(void) high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); sparse_memory_present_with_active_regions(0); #ifdef CONFIG_FLATMEM diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 104d56a9245..f35c66c5959 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start, #ifndef CONFIG_NUMA void __init initmem_init(void) { - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); } #endif diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 8dabbed409e..1e9da795767 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end) u64 i; phys_addr_t this_start, this_end; - for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { + for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { this_start = clamp_t(phys_addr_t, this_start, start, end); this_end = clamp_t(phys_addr_t, this_end, start, end); if (this_start < this_end) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index c85da7bb6b6..81b2750f366 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -491,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) for (i = 0; i < mi->nr_blks; i++) { struct numa_memblk *mb = &mi->blk[i]; - memblock_set_node(mb->start, mb->end - mb->start, mb->nid); + memblock_set_node(mb->start, mb->end - mb->start, + &memblock.memory, mb->nid); + + /* + * At this time, all memory regions reserved by memblock are + * used by the kernel. Set the nid in memblock.reserved will + * mark out all the nodes the kernel resides in. + */ + memblock_set_node(mb->start, mb->end - mb->start, + &memblock.reserved, mb->nid); } /* @@ -553,6 +562,30 @@ static void __init numa_init_array(void) } } +static void __init numa_clear_kernel_node_hotplug(void) +{ + int i, nid; + nodemask_t numa_kernel_nodes; + unsigned long start, end; + struct memblock_type *type = &memblock.reserved; + + /* Mark all kernel nodes. */ + for (i = 0; i < type->cnt; i++) + node_set(type->regions[i].nid, numa_kernel_nodes); + + /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ + for (i = 0; i < numa_meminfo.nr_blks; i++) { + nid = numa_meminfo.blk[i].nid; + if (!node_isset(nid, numa_kernel_nodes)) + continue; + + start = numa_meminfo.blk[i].start; + end = numa_meminfo.blk[i].end; + + memblock_clear_hotplug(start, end - start); + } +} + static int __init numa_init(int (*init_func)(void)) { int i; @@ -565,7 +598,12 @@ static int __init numa_init(int (*init_func)(void)) nodes_clear(node_possible_map); nodes_clear(node_online_map); memset(&numa_meminfo, 0, sizeof(numa_meminfo)); - WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); + WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory, + MAX_NUMNODES)); + WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved, + MAX_NUMNODES)); + /* In case that parsing SRAT failed. */ + WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX)); numa_reset_distance(); ret = init_func(); @@ -601,6 +639,16 @@ static int __init numa_init(int (*init_func)(void)) numa_clear_node(i); } numa_init_array(); + + /* + * At very early time, the kernel have to use some memory such as + * loading the kernel image. We cannot prevent this anyway. So any + * node the kernel resides in should be un-hotpluggable. + * + * And when we come here, numa_init() won't fail. + */ + numa_clear_kernel_node_hotplug(); + return 0; } diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 266ca912f62..1a25187e151 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -181,6 +181,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) (unsigned long long) start, (unsigned long long) end - 1, hotpluggable ? " hotplug" : ""); + /* Mark hotplug range in memblock. */ + if (hotpluggable && memblock_mark_hotplug(start, ma->length)) + pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", + (unsigned long long)start, (unsigned long long)end - 1); + return 0; out_err_bad_srat: bad_srat(); |