From 6b74ab97bc12ce74acec900f1d89a4aee2e4d70d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:49 -0700 Subject: mm: add a basic debugging framework for memory initialisation Boot initialisation is very complex, with significant numbers of architecture-specific routines, hooks and code ordering. While significant amounts of the initialisation is architecture-independent, it trusts the data received from the architecture layer. This is a mistake, and has resulted in a number of difficult-to-diagnose bugs. This patchset adds some validation and tracing to memory initialisation. It also introduces a few basic defensive measures. The validation code can be explicitly disabled for embedded systems. This patch: Add additional debugging and verification code for memory initialisation. Once enabled, the verification checks are always run and when required additional debugging information may be outputted via a mminit_loglevel= command-line parameter. The verification code is placed in a new file mm/mm_init.c. Ideally other mm initialisation code will be moved here over time. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 79ac4afc908..0908352ba72 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2975,7 +2975,8 @@ void __init sparse_memory_present_with_active_regions(int nid) void __init push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn) { - printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n", + mminit_dprintk(MMINIT_TRACE, "zoneboundary", + "Entering push_node_boundaries(%u, %lu, %lu)\n", nid, start_pfn, end_pfn); /* Initialise the boundary for this node if necessary */ @@ -2993,7 +2994,8 @@ void __init push_node_boundaries(unsigned int nid, static void __meminit account_node_boundary(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn) { - printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n", + mminit_dprintk(MMINIT_TRACE, "zoneboundary", + "Entering account_node_boundary(%u, %lu, %lu)\n", nid, *start_pfn, *end_pfn); /* Return if boundary information has not been provided */ @@ -3368,8 +3370,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; if (realsize >= memmap_pages) { realsize -= memmap_pages; - printk(KERN_DEBUG - " %s zone: %lu pages used for memmap\n", + mminit_dprintk(MMINIT_TRACE, "memmap_init", + "%s zone: %lu pages used for memmap\n", zone_names[j], memmap_pages); } else printk(KERN_WARNING @@ -3379,7 +3381,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, /* Account for reserved pages */ if (j == 0 && realsize > dma_reserve) { realsize -= dma_reserve; - printk(KERN_DEBUG " %s zone: %lu pages reserved\n", + mminit_dprintk(MMINIT_TRACE, "memmap_init", + "%s zone: %lu pages reserved\n", zone_names[0], dma_reserve); } @@ -3520,10 +3523,11 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, { int i; - printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) " - "%d entries of %d used\n", - nid, start_pfn, end_pfn, - nr_nodemap_entries, MAX_ACTIVE_REGIONS); + mminit_dprintk(MMINIT_TRACE, "memory_register", + "Entering add_active_range(%d, %#lx, %#lx) " + "%d entries of %d used\n", + nid, start_pfn, end_pfn, + nr_nodemap_entries, MAX_ACTIVE_REGIONS); /* Merge with existing active regions if possible */ for (i = 0; i < nr_nodemap_entries; i++) { -- cgit v1.2.3-70-g09d2 From 708614e6180f398cd307ea0048d48ba6fa274610 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:51 -0700 Subject: mm: verify the page links and memory model Print out information on how the page flags are being used if mminit_loglevel is MMINIT_VERIFY or higher and unconditionally performs sanity checks on the flags regardless of loglevel. When the page flags are updated with section, node and zone information, a check are made to ensure the values can be retrieved correctly. Finally we confirm that pfn_to_page and page_to_pfn are the correct inverse functions. [akpm@linux-foundation.org: fix printk warnings] Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 12 ++++++++++ mm/mm_init.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 8 +++++++ 3 files changed, 91 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/internal.h b/mm/internal.h index a7ee0525329..7a4a2885dc8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -78,6 +78,10 @@ do { \ } \ } while (0) +extern void mminit_verify_pageflags_layout(void); +extern void mminit_verify_page_links(struct page *page, + enum zone_type zone, unsigned long nid, unsigned long pfn); + #else static inline void mminit_dprintk(enum mminit_level level, @@ -85,5 +89,13 @@ static inline void mminit_dprintk(enum mminit_level level, { } +static inline void mminit_verify_pageflags_layout(void) +{ +} + +static inline void mminit_verify_page_links(struct page *page, + enum zone_type zone, unsigned long nid, unsigned long pfn) +{ +} #endif /* CONFIG_DEBUG_MEMORY_INIT */ #endif diff --git a/mm/mm_init.c b/mm/mm_init.c index c01d8dfec81..e16990d629e 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -7,9 +7,80 @@ */ #include #include +#include "internal.h" int __meminitdata mminit_loglevel; +void __init mminit_verify_pageflags_layout(void) +{ + int shift, width; + unsigned long or_mask, add_mask; + + shift = 8 * sizeof(unsigned long); + width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH; + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", + "Section %d Node %d Zone %d Flags %d\n", + SECTIONS_WIDTH, + NODES_WIDTH, + ZONES_WIDTH, + NR_PAGEFLAGS); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", + "Section %d Node %d Zone %d\n", +#ifdef SECTIONS_SHIFT + SECTIONS_SHIFT, +#else + 0, +#endif + NODES_SHIFT, + ZONES_SHIFT); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_offsets", + "Section %lu Node %lu Zone %lu\n", + (unsigned long)SECTIONS_PGSHIFT, + (unsigned long)NODES_PGSHIFT, + (unsigned long)ZONES_PGSHIFT); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_zoneid", + "Zone ID: %lu -> %lu\n", + (unsigned long)ZONEID_PGOFF, + (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT)); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_usage", + "location: %d -> %d unused %d -> %d flags %d -> %d\n", + shift, width, width, NR_PAGEFLAGS, NR_PAGEFLAGS, 0); +#ifdef NODE_NOT_IN_PAGE_FLAGS + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", + "Node not in page flags"); +#endif + + if (SECTIONS_WIDTH) { + shift -= SECTIONS_WIDTH; + BUG_ON(shift != SECTIONS_PGSHIFT); + } + if (NODES_WIDTH) { + shift -= NODES_WIDTH; + BUG_ON(shift != NODES_PGSHIFT); + } + if (ZONES_WIDTH) { + shift -= ZONES_WIDTH; + BUG_ON(shift != ZONES_PGSHIFT); + } + + /* Check for bitmask overlaps */ + or_mask = (ZONES_MASK << ZONES_PGSHIFT) | + (NODES_MASK << NODES_PGSHIFT) | + (SECTIONS_MASK << SECTIONS_PGSHIFT); + add_mask = (ZONES_MASK << ZONES_PGSHIFT) + + (NODES_MASK << NODES_PGSHIFT) + + (SECTIONS_MASK << SECTIONS_PGSHIFT); + BUG_ON(or_mask != add_mask); +} + +void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone, + unsigned long nid, unsigned long pfn) +{ + BUG_ON(page_to_nid(page) != nid); + BUG_ON(page_zonenum(page) != zone); + BUG_ON(page_to_pfn(page) != pfn); +} + static __init int set_mminit_loglevel(char *str) { get_option(&str, &mminit_loglevel); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0908352ba72..acab6ad326d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2534,6 +2534,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, } page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); + mminit_verify_page_links(page, zone, nid, pfn); init_page_count(page); reset_page_mapcount(page); SetPageReserved(page); @@ -2836,6 +2837,12 @@ __meminit int init_currently_empty_zone(struct zone *zone, zone->zone_start_pfn = zone_start_pfn; + mminit_dprintk(MMINIT_TRACE, "memmap_init", + "Initialising map node %d zone %lu pfns %lu -> %lu\n", + pgdat->node_id, + (unsigned long)zone_idx(zone), + zone_start_pfn, (zone_start_pfn + size)); + zone_init_free_lists(zone); return 0; @@ -3961,6 +3968,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) early_node_map[i].end_pfn); /* Initialise every node */ + mminit_verify_pageflags_layout(); setup_nr_node_ids(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); -- cgit v1.2.3-70-g09d2 From 2dbb51c49f4fecb8330e43247a0edfbc4b2b8974 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:52 -0700 Subject: mm: make defensive checks around PFN values registered for memory usage There are a number of different views to how much memory is currently active. There is the arch-independent zone-sizing view, the bootmem allocator and memory models view. Architectures register this information at different times and is not necessarily in sync particularly with respect to some SPARSEMEM limitations. This patch introduces mminit_validate_memmodel_limits() which is able to validate and correct PFN ranges with respect to the memory model. It is only SPARSEMEM that currently validates itself. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/bootmem.c | 1 + mm/internal.h | 12 ++++++++++++ mm/page_alloc.c | 2 ++ mm/sparse.c | 37 +++++++++++++++++++++++++++++-------- 4 files changed, 44 insertions(+), 8 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/bootmem.c b/mm/bootmem.c index 8d9f60e06f6..9f4bbc5da73 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -91,6 +91,7 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize; + mminit_validate_memmodel_limits(&start, &end); bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); bdata->node_boot_start = PFN_PHYS(start); bdata->node_low_pfn = end; diff --git a/mm/internal.h b/mm/internal.h index 7a4a2885dc8..5d17f3efac4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -98,4 +98,16 @@ static inline void mminit_verify_page_links(struct page *page, { } #endif /* CONFIG_DEBUG_MEMORY_INIT */ + +/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ +#if defined(CONFIG_SPARSEMEM) +extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, + unsigned long *end_pfn); +#else +static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, + unsigned long *end_pfn) +{ +} +#endif /* CONFIG_SPARSEMEM */ + #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index acab6ad326d..0adb66e711e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3536,6 +3536,8 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, nid, start_pfn, end_pfn, nr_nodemap_entries, MAX_ACTIVE_REGIONS); + mminit_validate_memmodel_limits(&start_pfn, &end_pfn); + /* Merge with existing active regions if possible */ for (i = 0; i < nr_nodemap_entries; i++) { if (early_node_map[i].nid != nid) diff --git a/mm/sparse.c b/mm/sparse.c index 36511c7b5e2..7a3650923d9 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -12,6 +12,7 @@ #include #include #include +#include "internal.h" /* * Permanent SPARSEMEM data: @@ -147,22 +148,41 @@ static inline int sparse_early_nid(struct mem_section *section) return (section->section_mem_map >> SECTION_NID_SHIFT); } -/* Record a memory area against a node. */ -void __init memory_present(int nid, unsigned long start, unsigned long end) +/* Validate the physical addressing limitations of the model */ +void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, + unsigned long *end_pfn) { - unsigned long max_arch_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); - unsigned long pfn; + unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); /* * Sanity checks - do not allow an architecture to pass * in larger pfns than the maximum scope of sparsemem: */ - if (start >= max_arch_pfn) - return; - if (end >= max_arch_pfn) - end = max_arch_pfn; + if (*start_pfn > max_sparsemem_pfn) { + mminit_dprintk(MMINIT_WARNING, "pfnvalidation", + "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n", + *start_pfn, *end_pfn, max_sparsemem_pfn); + WARN_ON_ONCE(1); + *start_pfn = max_sparsemem_pfn; + *end_pfn = max_sparsemem_pfn; + } + + if (*end_pfn > max_sparsemem_pfn) { + mminit_dprintk(MMINIT_WARNING, "pfnvalidation", + "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n", + *start_pfn, *end_pfn, max_sparsemem_pfn); + WARN_ON_ONCE(1); + *end_pfn = max_sparsemem_pfn; + } +} + +/* Record a memory area against a node. */ +void __init memory_present(int nid, unsigned long start, unsigned long end) +{ + unsigned long pfn; start &= PAGE_SECTION_MASK; + mminit_validate_memmodel_limits(&start, &end); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); struct mem_section *ms; @@ -187,6 +207,7 @@ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, unsigned long pfn; unsigned long nr_pages = 0; + mminit_validate_memmodel_limits(&start_pfn, &end_pfn); for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { if (nid != early_pfn_to_nid(pfn)) continue; -- cgit v1.2.3-70-g09d2 From 68ad8df42e12037c3894c9706ab428bf5cd6426b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 23 Jul 2008 21:26:52 -0700 Subject: mm: print out the zonelists on request for manual verification This patch prints out the zonelists during boot for manual verification by the user if the mminit_loglevel is MMINIT_VERIFY or higher. Signed-off-by: Mel Gorman Cc: Christoph Lameter Cc: Andy Whitcroft Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 5 +++++ mm/mm_init.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 1 + 3 files changed, 51 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/internal.h b/mm/internal.h index 5d17f3efac4..50807e12490 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -81,6 +81,7 @@ do { \ extern void mminit_verify_pageflags_layout(void); extern void mminit_verify_page_links(struct page *page, enum zone_type zone, unsigned long nid, unsigned long pfn); +extern void mminit_verify_zonelist(void); #else @@ -97,6 +98,10 @@ static inline void mminit_verify_page_links(struct page *page, enum zone_type zone, unsigned long nid, unsigned long pfn) { } + +static inline void mminit_verify_zonelist(void) +{ +} #endif /* CONFIG_DEBUG_MEMORY_INIT */ /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ diff --git a/mm/mm_init.c b/mm/mm_init.c index e16990d629e..ce445ca097e 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -11,6 +11,51 @@ int __meminitdata mminit_loglevel; +/* The zonelists are simply reported, validation is manual. */ +void mminit_verify_zonelist(void) +{ + int nid; + + if (mminit_loglevel < MMINIT_VERIFY) + return; + + for_each_online_node(nid) { + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *zone; + struct zoneref *z; + struct zonelist *zonelist; + int i, listid, zoneid; + + BUG_ON(MAX_ZONELISTS > 2); + for (i = 0; i < MAX_ZONELISTS * MAX_NR_ZONES; i++) { + + /* Identify the zone and nodelist */ + zoneid = i % MAX_NR_ZONES; + listid = i / MAX_NR_ZONES; + zonelist = &pgdat->node_zonelists[listid]; + zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + /* Print information about the zonelist */ + printk(KERN_DEBUG "mminit::zonelist %s %d:%s = ", + listid > 0 ? "thisnode" : "general", nid, + zone->name); + + /* Iterate the zonelist */ + for_each_zone_zonelist(zone, z, zonelist, zoneid) { +#ifdef CONFIG_NUMA + printk(KERN_CONT "%d:%s ", + zone->node, zone->name); +#else + printk(KERN_CONT "0:%s ", zone->name); +#endif /* CONFIG_NUMA */ + } + printk(KERN_CONT "\n"); + } + } +} + void __init mminit_verify_pageflags_layout(void) { int shift, width; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0adb66e711e..9ece07ce65b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2352,6 +2352,7 @@ void build_all_zonelists(void) if (system_state == SYSTEM_BOOTING) { __build_all_zonelists(NULL); + mminit_verify_zonelist(); cpuset_init_current_mems_allowed(); } else { /* we have to stop all cpus to guarantee there is no user -- cgit v1.2.3-70-g09d2 From b61bfa3c462671c48a51fb5c31af337c5a996a04 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:26:55 -0700 Subject: mm: move bootmem descriptors definition to a single place There are a lot of places that define either a single bootmem descriptor or an array of them. Use only one central array with MAX_NUMNODES items instead. Signed-off-by: Johannes Weiner Acked-by: Ralf Baechle Cc: Ingo Molnar Cc: Richard Henderson Cc: Russell King Cc: Tony Luck Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Kyle McMartin Cc: Paul Mackerras Cc: Paul Mundt Cc: David S. Miller Cc: Yinghai Lu Cc: Christoph Lameter Cc: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/numa.c | 8 ++++---- arch/arm/mm/discontig.c | 34 ++++++++++++++++------------------ arch/ia64/mm/discontig.c | 11 +++++------ arch/m32r/mm/discontig.c | 4 +--- arch/m68k/mm/init.c | 4 +--- arch/mips/sgi-ip27/ip27-memory.c | 4 +--- arch/parisc/mm/init.c | 3 +-- arch/powerpc/mm/numa.c | 3 +-- arch/sh/mm/numa.c | 5 ++--- arch/sparc64/mm/init.c | 3 +-- arch/x86/mm/discontig_32.c | 3 +-- arch/x86/mm/numa_64.c | 4 +--- include/linux/bootmem.h | 2 ++ mm/bootmem.c | 2 ++ mm/page_alloc.c | 4 +--- 15 files changed, 40 insertions(+), 54 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 10ab7833e83..a53fda0481c 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -19,7 +19,6 @@ #include pg_data_t node_data[MAX_NUMNODES]; -bootmem_data_t node_bdata[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); #undef DEBUG_DISCONTIG @@ -141,7 +140,7 @@ setup_memory_node(int nid, void *kernel_end) printk(" not enough mem to reserve NODE_DATA"); return; } - NODE_DATA(nid)->bdata = &node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; printk(" Detected node memory: start %8lu, end %8lu\n", node_min_pfn, node_max_pfn); @@ -304,8 +303,9 @@ void __init paging_init(void) dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; for_each_online_node(nid) { - unsigned long start_pfn = node_bdata[nid].node_boot_start >> PAGE_SHIFT; - unsigned long end_pfn = node_bdata[nid].node_low_pfn; + bootmem_data_t *bdata = &bootmem_node_data[nid]; + unsigned long start_pfn = bdata->node_boot_start >> PAGE_SHIFT; + unsigned long end_pfn = bdata->node_low_pfn; if (dma_local_pfn >= end_pfn - start_pfn) zones_size[ZONE_DMA] = end_pfn - start_pfn; diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c index 1e560218950..c8c0c4b0f0a 100644 --- a/arch/arm/mm/discontig.c +++ b/arch/arm/mm/discontig.c @@ -21,26 +21,24 @@ * Our node_data structure for discontiguous memory. */ -static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; - pg_data_t discontig_node_data[MAX_NUMNODES] = { - { .bdata = &node_bootmem_data[0] }, - { .bdata = &node_bootmem_data[1] }, - { .bdata = &node_bootmem_data[2] }, - { .bdata = &node_bootmem_data[3] }, + { .bdata = &bootmem_node_data[0] }, + { .bdata = &bootmem_node_data[1] }, + { .bdata = &bootmem_node_data[2] }, + { .bdata = &bootmem_node_data[3] }, #if MAX_NUMNODES == 16 - { .bdata = &node_bootmem_data[4] }, - { .bdata = &node_bootmem_data[5] }, - { .bdata = &node_bootmem_data[6] }, - { .bdata = &node_bootmem_data[7] }, - { .bdata = &node_bootmem_data[8] }, - { .bdata = &node_bootmem_data[9] }, - { .bdata = &node_bootmem_data[10] }, - { .bdata = &node_bootmem_data[11] }, - { .bdata = &node_bootmem_data[12] }, - { .bdata = &node_bootmem_data[13] }, - { .bdata = &node_bootmem_data[14] }, - { .bdata = &node_bootmem_data[15] }, + { .bdata = &bootmem_node_data[4] }, + { .bdata = &bootmem_node_data[5] }, + { .bdata = &bootmem_node_data[6] }, + { .bdata = &bootmem_node_data[7] }, + { .bdata = &bootmem_node_data[8] }, + { .bdata = &bootmem_node_data[9] }, + { .bdata = &bootmem_node_data[10] }, + { .bdata = &bootmem_node_data[11] }, + { .bdata = &bootmem_node_data[12] }, + { .bdata = &bootmem_node_data[13] }, + { .bdata = &bootmem_node_data[14] }, + { .bdata = &bootmem_node_data[15] }, #endif }; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 544dc420c65..2fcf8464331 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -36,7 +36,6 @@ struct early_node_data { struct ia64_node_data *node_data; unsigned long pernode_addr; unsigned long pernode_size; - struct bootmem_data bootmem_data; unsigned long num_physpages; #ifdef CONFIG_ZONE_DMA unsigned long num_dma_physpages; @@ -76,7 +75,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, int node) { unsigned long cstart, epfn, end = start + len; - struct bootmem_data *bdp = &mem_data[node].bootmem_data; + struct bootmem_data *bdp = &bootmem_node_data[node]; epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; cstart = GRANULEROUNDDOWN(start); @@ -167,7 +166,7 @@ static void __init fill_pernode(int node, unsigned long pernode, { void *cpu_data; int cpus = early_nr_cpus_node(node); - struct bootmem_data *bdp = &mem_data[node].bootmem_data; + struct bootmem_data *bdp = &bootmem_node_data[node]; mem_data[node].pernode_addr = pernode; mem_data[node].pernode_size = pernodesize; @@ -224,7 +223,7 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, { unsigned long epfn; unsigned long pernodesize = 0, pernode, pages, mapsize; - struct bootmem_data *bdp = &mem_data[node].bootmem_data; + struct bootmem_data *bdp = &bootmem_node_data[node]; epfn = (start + len) >> PAGE_SHIFT; @@ -440,7 +439,7 @@ void __init find_memory(void) efi_memmap_walk(find_max_min_low_pfn, NULL); for_each_online_node(node) - if (mem_data[node].bootmem_data.node_low_pfn) { + if (bootmem_node_data[node].node_low_pfn) { node_clear(node, memory_less_mask); mem_data[node].min_pfn = ~0UL; } @@ -460,7 +459,7 @@ void __init find_memory(void) else if (node_isset(node, memory_less_mask)) continue; - bdp = &mem_data[node].bootmem_data; + bdp = &bootmem_node_data[node]; pernode = mem_data[node].pernode_addr; pernodesize = mem_data[node].pernode_size; map = pernode + pernodesize; diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index 07c1af7dc0e..aa9145ef6cc 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -20,7 +20,6 @@ extern char _end[]; struct pglist_data *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); -static bootmem_data_t node_bdata[MAX_NUMNODES] __initdata; pg_data_t m32r_node_data[MAX_NUMNODES]; @@ -81,7 +80,7 @@ unsigned long __init setup_memory(void) for_each_online_node(nid) { mp = &mem_prof[nid]; NODE_DATA(nid)=(pg_data_t *)&m32r_node_data[nid]; - NODE_DATA(nid)->bdata = &node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; min_pfn = mp->start_pfn; max_pfn = mp->start_pfn + mp->pages; bootmap_size = init_bootmem_node(NODE_DATA(nid), mp->free_pfn, @@ -163,4 +162,3 @@ unsigned long __init zone_sizes_init(void) return holes; } - diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index d8fb9c5303c..79f5f94d480 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -32,8 +32,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -static bootmem_data_t __initdata bootmem_data[MAX_NUMNODES]; - pg_data_t pg_data_map[MAX_NUMNODES]; EXPORT_SYMBOL(pg_data_map); @@ -58,7 +56,7 @@ void __init m68k_setup_node(int node) pg_data_table[i] = pg_data_map + node; } #endif - pg_data_map[node].bdata = bootmem_data + node; + pg_data_map[node].bdata = bootmem_node_data + node; node_set_online(node); } diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 42cd1095630..060d853d7b3 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -33,8 +33,6 @@ #define SLOT_PFNSHIFT (SLOT_SHIFT - PAGE_SHIFT) #define PFN_NASIDSHFT (NASID_SHFT - PAGE_SHIFT) -static struct bootmem_data __initdata plat_node_bdata[MAX_COMPACT_NODES]; - struct node_data *__node_data[MAX_COMPACT_NODES]; EXPORT_SYMBOL(__node_data); @@ -403,7 +401,7 @@ static void __init node_mem_init(cnodeid_t node) */ __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); - NODE_DATA(node)->bdata = &plat_node_bdata[node]; + NODE_DATA(node)->bdata = &bootmem_node_data[node]; NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index b4d6c8777ed..0ddf4904640 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -36,7 +36,6 @@ extern int data_start; #ifdef CONFIG_DISCONTIGMEM struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly; unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; #endif @@ -262,7 +261,7 @@ static void __init setup_bootmem(void) #ifdef CONFIG_DISCONTIGMEM for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - NODE_DATA(i)->bdata = &bmem_data[i]; + NODE_DATA(i)->bdata = &bootmem_node_data[i]; } memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index cf4bffba6f7..d9a18135133 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(numa_cpumask_lookup_table); EXPORT_SYMBOL(node_data); -static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; @@ -816,7 +815,7 @@ void __init do_init_bootmem(void) dbg("node %d\n", nid); dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 1663199ce88..095d93bec7c 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -14,7 +14,6 @@ #include #include -static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL_GPL(node_data); @@ -35,7 +34,7 @@ void __init setup_memory(void) NODE_DATA(0) = pfn_to_kaddr(free_pfn); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); free_pfn += PFN_UP(sizeof(struct pglist_data)); - NODE_DATA(0)->bdata = &plat_node_bdata[0]; + NODE_DATA(0)->bdata = &bootmem_node_data[0]; /* Set up node 0 */ setup_bootmem_allocator(free_pfn); @@ -66,7 +65,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) free_pfn += PFN_UP(sizeof(struct pglist_data)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 84898c44dd4..71329747395 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -788,7 +788,6 @@ int numa_cpu_lookup_table[NR_CPUS]; cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; #ifdef CONFIG_NEED_MULTIPLE_NODES -static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; struct mdesc_mblock { u64 base; @@ -871,7 +870,7 @@ static void __init allocate_node_data(int nid) NODE_DATA(nid) = __va(paddr); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; #endif p = NODE_DATA(nid); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 5dfef9fa061..62fa440678d 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -42,7 +42,6 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -static bootmem_data_t node0_bdata; /* * numa interface - we expect the numa architecture specific code to have @@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn, for_each_online_node(nid) memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(0)->bdata = &node0_bdata; + NODE_DATA(0)->bdata = &bootmem_node_data[0]; setup_bootmem_allocator(); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9782f42dd31..a4dd793d600 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -23,8 +23,6 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; - struct memnode memnode; s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { @@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, nodedata_phys + pgdat_size - 1); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); - NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; + NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index a1d9b79078e..2599c741405 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -38,6 +38,8 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; +extern bootmem_data_t bootmem_node_data[]; + extern unsigned long bootmem_bootmap_pages(unsigned long); extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern void free_bootmem(unsigned long addr, unsigned long size); diff --git a/mm/bootmem.c b/mm/bootmem.c index 9f4bbc5da73..35b3cb66703 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -36,6 +36,8 @@ static LIST_HEAD(bdata_list); unsigned long saved_max_pfn; #endif +bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; + /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages(unsigned long pages) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9ece07ce65b..e089b92cdff 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4040,9 +4040,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) } #ifndef CONFIG_NEED_MULTIPLE_NODES -static bootmem_data_t contig_bootmem_data; -struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; - +struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] }; EXPORT_SYMBOL(contig_page_data); #endif -- cgit v1.2.3-70-g09d2 From e4048e5dc4aecec670f48ed007a28779f09cebd6 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 23 Jul 2008 21:27:01 -0700 Subject: page allocator: inline some __alloc_pages() wrappers Two zonelist patch series rewrote __page_alloc() largely. Now, it is just a wrapper function. Inlining them will save a function call. [akpm@linux-foundation.org: export __alloc_pages_internal] Cc: Lee Schermerhorn Cc: Mel Gorman Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 21 +++++++++++++++++---- mm/page_alloc.c | 19 ++----------------- 2 files changed, 19 insertions(+), 21 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b414be38718..f640ed24142 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -173,11 +173,24 @@ static inline void arch_free_page(struct page *page, int order) { } static inline void arch_alloc_page(struct page *page, int order) { } #endif -extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *); +struct page * +__alloc_pages_internal(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, nodemask_t *nodemask); + +static inline struct page * +__alloc_pages(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist) +{ + return __alloc_pages_internal(gfp_mask, order, zonelist, NULL); +} + +static inline struct page * +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, nodemask_t *nodemask) +{ + return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask); +} -extern struct page * -__alloc_pages_nodemask(gfp_t, unsigned int, - struct zonelist *, nodemask_t *nodemask); static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e089b92cdff..35b1347d81b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1429,7 +1429,7 @@ try_next_zone: /* * This is the 'heart' of the zoned buddy allocator. */ -static struct page * +struct page * __alloc_pages_internal(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { @@ -1632,22 +1632,7 @@ nopage: got_pg: return page; } - -struct page * -__alloc_pages(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist) -{ - return __alloc_pages_internal(gfp_mask, order, zonelist, NULL); -} - -struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, nodemask_t *nodemask) -{ - return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask); -} - -EXPORT_SYMBOL(__alloc_pages); +EXPORT_SYMBOL(__alloc_pages_internal); /* * Common helper functions. -- cgit v1.2.3-70-g09d2 From 3c82d0ce2c4f642b2f24ef98707a030543b06b90 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 23 Jul 2008 21:27:11 -0700 Subject: buddy: clarify comments describing buddy merge In __free_one_page(), the comment "Move the buddy up one level" appears attached to the break and by implication when the break is taken we are moving it up one level: if (!page_is_buddy(page, buddy, order)) break; /* Move the buddy up one level. */ In reality the inverse is true, we break out when we can no longer merge this page with its buddy. Looking back into pre-history (into the full git history) it appears that these two lines accidentally got joined as part of another change. Move the comment down where it belongs below the if and clarify its language. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 35b1347d81b..24aa3d1b9d9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -432,8 +432,9 @@ static inline void __free_one_page(struct page *page, buddy = __page_find_buddy(page, page_idx, order); if (!page_is_buddy(page, buddy, order)) - break; /* Move the buddy up one level. */ + break; + /* Our buddy is free, merge with it and move up one order. */ list_del(&buddy->lru); zone->free_area[order].nr_free--; rmv_page_order(buddy); -- cgit v1.2.3-70-g09d2 From 9109fb7b3520de187ebc3646c209d66a233f7169 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:27:20 -0700 Subject: mm: drop unneeded pgdat argument from free_area_init_node() free_area_init_node() gets passed in the node id as well as the node descriptor. This is redundant as the function can trivially get the node descriptor itself by means of NODE_DATA() and the node's id. I checked all the users and NODE_DATA() seems to be usable everywhere from where this function is called. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/mm/numa.c | 2 +- arch/arm/mm/init.c | 2 +- arch/avr32/mm/init.c | 2 +- arch/cris/arch-v10/mm/init.c | 2 +- arch/cris/arch-v32/mm/init.c | 2 +- arch/m32r/mm/discontig.c | 3 +-- arch/m32r/mm/init.c | 2 +- arch/m68k/mm/motorola.c | 2 +- arch/m68k/mm/sun3mmu.c | 2 +- arch/parisc/mm/init.c | 2 +- arch/sparc/mm/srmmu.c | 3 +-- arch/sparc/mm/sun4c.c | 3 +-- arch/v850/kernel/setup.c | 3 +-- include/linux/mm.h | 5 ++--- mm/memory_hotplug.c | 2 +- mm/page_alloc.c | 11 ++++++----- 16 files changed, 22 insertions(+), 26 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index a53fda0481c..def0c74a78a 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -313,7 +313,7 @@ void __init paging_init(void) zones_size[ZONE_DMA] = dma_local_pfn; zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn; } - free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, NULL); + free_area_init_node(nid, zones_size, start_pfn, NULL); } /* Initialize the kernel's ZERO_PGE. */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b657f1719af..e6352946dde 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -284,7 +284,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) */ arch_adjust_zones(node, zone_size, zhole_size); - free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size); + free_area_init_node(node, zone_size, start_pfn, zhole_size); return end_pfn; } diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index 3f90a87527b..786de88a82a 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -129,7 +129,7 @@ void __init paging_init(void) printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n", nid, start_pfn, low); - free_area_init_node(nid, pgdat, zones_size, start_pfn, NULL); + free_area_init_node(nid, zones_size, start_pfn, NULL); printk("Node %u: mem_map starts at %p\n", pgdat->node_id, pgdat->node_mem_map); diff --git a/arch/cris/arch-v10/mm/init.c b/arch/cris/arch-v10/mm/init.c index e0fcd1a9bfd..742fd1974c2 100644 --- a/arch/cris/arch-v10/mm/init.c +++ b/arch/cris/arch-v10/mm/init.c @@ -182,7 +182,7 @@ paging_init(void) * mem_map page array. */ - free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0); + free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0); } /* Initialize remaps of some I/O-ports. It is important that this diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c index 5a9ac583464..8a34b8b7429 100644 --- a/arch/cris/arch-v32/mm/init.c +++ b/arch/cris/arch-v32/mm/init.c @@ -162,7 +162,7 @@ paging_init(void) * substantially higher than 0, like us (we start at PAGE_OFFSET). This * saves space in the mem_map page array. */ - free_area_init_node(0, &contig_page_data, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0); + free_area_init_node(0, zones_size, PAGE_OFFSET >> PAGE_SHIFT, 0); mem_map = contig_page_data.node_mem_map; } diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index aa9145ef6cc..cc23934bc41 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -147,8 +147,7 @@ unsigned long __init zone_sizes_init(void) zholes_size[ZONE_DMA] = mp->holes; holes += zholes_size[ZONE_DMA]; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start_pfn, zholes_size); + free_area_init_node(nid, zones_size, start_pfn, zholes_size); } /* diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index bbd97c85bc5..28799af15e9 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -123,7 +123,7 @@ unsigned long __init zone_sizes_init(void) start_pfn = __MEMORY_START >> PAGE_SHIFT; #endif /* CONFIG_MMU */ - free_area_init_node(0, NODE_DATA(0), zones_size, start_pfn, 0); + free_area_init_node(0, zones_size, start_pfn, 0); return 0; } diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 226795bdf35..c5dbb9bdb32 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -296,7 +296,7 @@ void __init paging_init(void) #endif for (i = 0; i < m68k_num_memory; i++) { zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT; - free_area_init_node(i, pg_data_map + i, zones_size, + free_area_init_node(i, zones_size, m68k_memory[i].addr >> PAGE_SHIFT, NULL); } } diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c index edceefc1887..1b902dbd437 100644 --- a/arch/m68k/mm/sun3mmu.c +++ b/arch/m68k/mm/sun3mmu.c @@ -94,7 +94,7 @@ void __init paging_init(void) /* I really wish I knew why the following change made things better... -- Sam */ /* free_area_init(zones_size); */ - free_area_init_node(0, NODE_DATA(0), zones_size, + free_area_init_node(0, zones_size, (__pa(PAGE_OFFSET) >> PAGE_SHIFT) + 1, NULL); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 0ddf4904640..7c155c254e7 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -887,7 +887,7 @@ void __init paging_init(void) } #endif - free_area_init_node(i, NODE_DATA(i), zones_size, + free_area_init_node(i, zones_size, pmem_ranges[i].start_pfn, NULL); } } diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c624e04ff03..ee30462598f 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -1352,8 +1352,7 @@ void __init srmmu_paging_init(void) zones_size[ZONE_HIGHMEM] = npages; zholes_size[ZONE_HIGHMEM] = npages - calc_highpages(); - free_area_init_node(0, &contig_page_data, zones_size, - pfn_base, zholes_size); + free_area_init_node(0, zones_size, pfn_base, zholes_size); } } diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 2375fe9dc31..d1782f6368b 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -2123,8 +2123,7 @@ void __init sun4c_paging_init(void) zones_size[ZONE_HIGHMEM] = npages; zholes_size[ZONE_HIGHMEM] = npages - calc_highpages(); - free_area_init_node(0, &contig_page_data, zones_size, - pfn_base, zholes_size); + free_area_init_node(0, zones_size, pfn_base, zholes_size); } cnt = 0; diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c index a0a8456a843..10335cecf7b 100644 --- a/arch/v850/kernel/setup.c +++ b/arch/v850/kernel/setup.c @@ -295,8 +295,7 @@ init_mem_alloc (unsigned long ram_start, unsigned long ram_len) #error MAX_ORDER is too large for given PAGE_OFFSET (use CONFIG_FORCE_MAX_ZONEORDER to change it) #endif NODE_DATA(0)->node_mem_map = NULL; - free_area_init_node (0, NODE_DATA(0), zones_size, - ADDR_TO_PAGE (PAGE_OFFSET), 0); + free_area_init_node(0, zones_size, ADDR_TO_PAGE (PAGE_OFFSET), 0); } diff --git a/include/linux/mm.h b/include/linux/mm.h index f8071097302..196924b657b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -962,9 +962,8 @@ static inline void pgtable_page_dtor(struct page *page) NULL: pte_offset_kernel(pmd, address)) extern void free_area_init(unsigned long * zones_size); -extern void free_area_init_node(int nid, pg_data_t *pgdat, - unsigned long * zones_size, unsigned long zone_start_pfn, - unsigned long *zholes_size); +extern void free_area_init_node(int nid, unsigned long * zones_size, + unsigned long zone_start_pfn, unsigned long *zholes_size); #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 833f854eabe..6e26adc08f1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -455,7 +455,7 @@ static pg_data_t *hotadd_new_pgdat(int nid, u64 start) /* we can use NODE_DATA(nid) from here */ /* init node's zones as empty zones, we don't have any present pages.*/ - free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size); + free_area_init_node(nid, zones_size, start_pfn, zholes_size); return pgdat; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 24aa3d1b9d9..e43aae135b3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3461,10 +3461,11 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) #endif /* CONFIG_FLAT_NODE_MEM_MAP */ } -void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat, - unsigned long *zones_size, unsigned long node_start_pfn, - unsigned long *zholes_size) +void __paginginit free_area_init_node(int nid, unsigned long *zones_size, + unsigned long node_start_pfn, unsigned long *zholes_size) { + pg_data_t *pgdat = NODE_DATA(nid); + pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; calculate_node_totalpages(pgdat, zones_size, zholes_size); @@ -3961,7 +3962,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) setup_nr_node_ids(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); - free_area_init_node(nid, pgdat, NULL, + free_area_init_node(nid, NULL, find_min_pfn_for_node(nid), NULL); /* Any memory on that node */ @@ -4032,7 +4033,7 @@ EXPORT_SYMBOL(contig_page_data); void __init free_area_init(unsigned long *zones_size) { - free_area_init_node(0, NODE_DATA(0), zones_size, + free_area_init_node(0, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } -- cgit v1.2.3-70-g09d2 From 01ad1c0827db5b3695c53e296dbb2c1da16a0911 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 23 Jul 2008 21:27:46 -0700 Subject: mm: export prep_compound_page to mm hugetlb will need to get compound pages from bootmem to handle the case of them being greater than or equal to MAX_ORDER. Export the constructor function needed for this. Acked-by: Adam Litke Signed-off-by: Andi Kleen Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 2 ++ mm/page_alloc.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/internal.h b/mm/internal.h index 858ad01864d..1f43f741697 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -16,6 +16,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); +extern void prep_compound_page(struct page *page, unsigned long order); + static inline void set_page_count(struct page *page, int v) { atomic_set(&page->_count, v); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e43aae135b3..eaa86671ebb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -264,7 +264,7 @@ static void free_compound_page(struct page *page) __free_pages_ok(page, compound_order(page)); } -static void prep_compound_page(struct page *page, unsigned long order) +void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; -- cgit v1.2.3-70-g09d2 From 2be0ffe2b29bd31d3debd0877797892ff2d91f4c Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 23 Jul 2008 21:28:11 -0700 Subject: mm: add alloc_pages_exact() and free_pages_exact() alloc_pages_exact() is similar to alloc_pages(), except that it allocates the minimum number of pages to fulfill the request. This is useful if you want to allocate a very large buffer that is slightly larger than an even power-of-two number of pages. In that case, alloc_pages() will waste a lot of memory. I have a video driver that wants to allocate a 5MB buffer. alloc_pages() wiill waste 3MB of physically-contiguous memory. Signed-off-by: Timur Tabi Cc: Andi Kleen Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 +++ mm/page_alloc.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f640ed24142..e8003afeffb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -228,6 +228,9 @@ extern struct page *alloc_page_vma(gfp_t gfp_mask, extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); +void *alloc_pages_exact(size_t size, gfp_t gfp_mask); +void free_pages_exact(void *virt, size_t size); + #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eaa86671ebb..8d528d57b40 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1697,6 +1697,59 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); +/** + * alloc_pages_exact - allocate an exact number physically-contiguous pages. + * @size: the number of bytes to allocate + * @gfp_mask: GFP flags for the allocation + * + * This function is similar to alloc_pages(), except that it allocates the + * minimum number of pages to satisfy the request. alloc_pages() can only + * allocate memory in power-of-two pages. + * + * This function is also limited by MAX_ORDER. + * + * Memory allocated by this function must be released by free_pages_exact(). + */ +void *alloc_pages_exact(size_t size, gfp_t gfp_mask) +{ + unsigned int order = get_order(size); + unsigned long addr; + + addr = __get_free_pages(gfp_mask, order); + if (addr) { + unsigned long alloc_end = addr + (PAGE_SIZE << order); + unsigned long used = addr + PAGE_ALIGN(size); + + split_page(virt_to_page(addr), order); + while (used < alloc_end) { + free_page(used); + used += PAGE_SIZE; + } + } + + return (void *)addr; +} +EXPORT_SYMBOL(alloc_pages_exact); + +/** + * free_pages_exact - release memory allocated via alloc_pages_exact() + * @virt: the value returned by alloc_pages_exact. + * @size: size of allocation, same value as passed to alloc_pages_exact(). + * + * Release the memory allocated by a previous call to alloc_pages_exact. + */ +void free_pages_exact(void *virt, size_t size) +{ + unsigned long addr = (unsigned long)virt; + unsigned long end = addr + PAGE_ALIGN(size); + + while (addr < end) { + free_page(addr); + addr += PAGE_SIZE; + } +} +EXPORT_SYMBOL(free_pages_exact); + static unsigned int nr_free_zone_pages(int offset) { struct zoneref *z; -- cgit v1.2.3-70-g09d2 From b69a7288ea7bf171328f313f0edae629f50e3bdb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 23 Jul 2008 21:28:12 -0700 Subject: mm/page_alloc.c: cleanups This patch contains the following cleanups: - make the following needlessly global variables static: - required_kernelcore - zone_movable_pfn[] - make the following needlessly global functions static: - move_freepages() - move_freepages_block() - setup_pageset() - find_usable_zone_for_movable() - adjust_zone_range_for_zone_movable() - __absent_pages_in_range() - find_min_pfn_for_node() - find_zone_movable_pfns_for_nodes() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8d528d57b40..cd4c41432ef 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -153,9 +153,9 @@ static unsigned long __meminitdata dma_reserve; static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES]; static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES]; #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ - unsigned long __initdata required_kernelcore; + static unsigned long __initdata required_kernelcore; static unsigned long __initdata required_movablecore; - unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; + static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ int movable_zone; @@ -674,9 +674,9 @@ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { * Note that start_page and end_pages are not aligned on a pageblock * boundary. If alignment is required, use move_freepages_block() */ -int move_freepages(struct zone *zone, - struct page *start_page, struct page *end_page, - int migratetype) +static int move_freepages(struct zone *zone, + struct page *start_page, struct page *end_page, + int migratetype) { struct page *page; unsigned long order; @@ -715,7 +715,8 @@ int move_freepages(struct zone *zone, return pages_moved; } -int move_freepages_block(struct zone *zone, struct page *page, int migratetype) +static int move_freepages_block(struct zone *zone, struct page *page, + int migratetype) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; @@ -2652,7 +2653,7 @@ static int zone_batchsize(struct zone *zone) return batch; } -inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) +static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) { struct per_cpu_pages *pcp; @@ -3099,7 +3100,7 @@ void __meminit get_pfn_range_for_nid(unsigned int nid, * assumption is made that zones within a node are ordered in monotonic * increasing memory addresses so that the "highest" populated zone is used */ -void __init find_usable_zone_for_movable(void) +static void __init find_usable_zone_for_movable(void) { int zone_index; for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) { @@ -3125,7 +3126,7 @@ void __init find_usable_zone_for_movable(void) * highest usable zone for ZONE_MOVABLE. This preserves the assumption that * zones within a node are in order of monotonic increases memory addresses */ -void __meminit adjust_zone_range_for_zone_movable(int nid, +static void __meminit adjust_zone_range_for_zone_movable(int nid, unsigned long zone_type, unsigned long node_start_pfn, unsigned long node_end_pfn, @@ -3186,7 +3187,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, * then all holes in the requested range will be accounted for. */ -unsigned long __meminit __absent_pages_in_range(int nid, +static unsigned long __meminit __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { @@ -3723,7 +3724,7 @@ static void __init sort_node_map(void) } /* Find the lowest pfn for a node */ -unsigned long __init find_min_pfn_for_node(int nid) +static unsigned long __init find_min_pfn_for_node(int nid) { int i; unsigned long min_pfn = ULONG_MAX; @@ -3795,7 +3796,7 @@ static unsigned long __init early_calculate_totalpages(void) * memory. When they don't, some nodes will have more kernelcore than * others */ -void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) +static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) { int i, nid; unsigned long usable_startpfn; -- cgit v1.2.3-70-g09d2 From af370fb8cb3031f20438f246798d5f0d98089f29 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Wed, 23 Jul 2008 21:28:17 -0700 Subject: memory hotplug: small fixes to bootmem freeing for memory hotremove - Change some naming * Magic -> types * MIX_INFO -> MIX_SECTION_INFO * Change definition of bootmem type from direct hex value - __free_pages_bootmem() becomes __meminit. Signed-off-by: Yasunori Goto Cc: Andy Whitcroft Cc: Badari Pulavarty Cc: Yinghai Lu Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 8 ++++---- mm/memory_hotplug.c | 12 ++++++------ mm/page_alloc.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ea9f5ad9ec8..3628e5088f6 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,12 +13,12 @@ struct mem_section; #ifdef CONFIG_MEMORY_HOTPLUG /* - * Magic number for free bootmem. + * Types for free bootmem. * The normal smallest mapcount is -1. Here is smaller value than it. */ -#define SECTION_INFO 0xfffffffe -#define MIX_INFO 0xfffffffd -#define NODE_INFO 0xfffffffc +#define SECTION_INFO (-1 - 1) +#define MIX_SECTION_INFO (-1 - 2) +#define NODE_INFO (-1 - 3) /* * pgdat resizing functions diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ec85c37dcfb..0fb05b258f0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -62,9 +62,9 @@ static void release_memory_resource(struct resource *res) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE #ifndef CONFIG_SPARSEMEM_VMEMMAP -static void get_page_bootmem(unsigned long info, struct page *page, int magic) +static void get_page_bootmem(unsigned long info, struct page *page, int type) { - atomic_set(&page->_mapcount, magic); + atomic_set(&page->_mapcount, type); SetPagePrivate(page); set_page_private(page, info); atomic_inc(&page->_count); @@ -72,10 +72,10 @@ static void get_page_bootmem(unsigned long info, struct page *page, int magic) void put_page_bootmem(struct page *page) { - int magic; + int type; - magic = atomic_read(&page->_mapcount); - BUG_ON(magic >= -1); + type = atomic_read(&page->_mapcount); + BUG_ON(type >= -1); if (atomic_dec_return(&page->_count) == 1) { ClearPagePrivate(page); @@ -119,7 +119,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; for (i = 0; i < mapsize; i++, page++) - get_page_bootmem(section_nr, page, MIX_INFO); + get_page_bootmem(section_nr, page, MIX_SECTION_INFO); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cd4c41432ef..6da667274df 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -533,7 +533,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) /* * permit the bootmem allocator to evade page validation on high-order frees */ -void __free_pages_bootmem(struct page *page, unsigned int order) +void __meminit __free_pages_bootmem(struct page *page, unsigned int order) { if (order == 0) { __ClearPageReserved(page); -- cgit v1.2.3-70-g09d2 From 9b1a4d38373a5581a4e01032a3ccdd94cd93477b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Jul 2008 12:16:30 -0500 Subject: stop_machine: Wean existing callers off stop_machine_run() Signed-off-by: Rusty Russell --- arch/s390/kernel/kprobes.c | 6 +++--- drivers/char/hw_random/intel-rng.c | 6 +++--- kernel/module.c | 8 ++++---- kernel/rcuclassic.c | 4 ++-- mm/page_alloc.c | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 4f82e5b5f87..569079ec4ff 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -197,7 +197,7 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) args.new = BREAKPOINT_INSTRUCTION; kcb->kprobe_status = KPROBE_SWAP_INST; - stop_machine_run(swap_instruction, &args, NR_CPUS); + stop_machine(swap_instruction, &args, NULL); kcb->kprobe_status = status; } @@ -212,7 +212,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) args.new = p->opcode; kcb->kprobe_status = KPROBE_SWAP_INST; - stop_machine_run(swap_instruction, &args, NR_CPUS); + stop_machine(swap_instruction, &args, NULL); kcb->kprobe_status = status; } @@ -331,7 +331,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) * No kprobe at this address. The fault has not been * caused by a kprobe breakpoint. The race of breakpoint * vs. kprobe remove does not exist because on s390 we - * use stop_machine_run to arm/disarm the breakpoints. + * use stop_machine to arm/disarm the breakpoints. */ goto no_kprobe; diff --git a/drivers/char/hw_random/intel-rng.c b/drivers/char/hw_random/intel-rng.c index 27fdc086649..8a2fce0756e 100644 --- a/drivers/char/hw_random/intel-rng.c +++ b/drivers/char/hw_random/intel-rng.c @@ -241,7 +241,7 @@ static int __init intel_rng_hw_init(void *_intel_rng_hw) struct intel_rng_hw *intel_rng_hw = _intel_rng_hw; u8 mfc, dvc; - /* interrupts disabled in stop_machine_run call */ + /* interrupts disabled in stop_machine call */ if (!(intel_rng_hw->fwh_dec_en1_val & FWH_F8_EN_MASK)) pci_write_config_byte(intel_rng_hw->dev, @@ -365,10 +365,10 @@ static int __init mod_init(void) * location with the Read ID command, all activity on the system * must be stopped until the state is back to normal. * - * Use stop_machine_run because IPIs can be blocked by disabling + * Use stop_machine because IPIs can be blocked by disabling * interrupts. */ - err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NR_CPUS); + err = stop_machine(intel_rng_hw_init, intel_rng_hw, NULL); pci_dev_put(dev); iounmap(intel_rng_hw->mem); kfree(intel_rng_hw); diff --git a/kernel/module.c b/kernel/module.c index d861bd5b8c1..61d212120df 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -678,7 +678,7 @@ static int try_stop_module(struct module *mod, int flags, int *forced) if (flags & O_NONBLOCK) { struct stopref sref = { mod, flags, forced }; - return stop_machine_run(__try_stop_module, &sref, NR_CPUS); + return stop_machine(__try_stop_module, &sref, NULL); } else { /* We don't need to stop the machine for this. */ mod->state = MODULE_STATE_GOING; @@ -1416,7 +1416,7 @@ static int __unlink_module(void *_mod) static void free_module(struct module *mod) { /* Delete from various lists */ - stop_machine_run(__unlink_module, mod, NR_CPUS); + stop_machine(__unlink_module, mod, NULL); remove_notes_attrs(mod); remove_sect_attrs(mod); mod_kobject_remove(mod); @@ -2197,7 +2197,7 @@ static struct module *load_module(void __user *umod, /* Now sew it into the lists so we can get lockdep and oops * info during argument parsing. Noone should access us, since * strong_try_module_get() will fail. */ - stop_machine_run(__link_module, mod, NR_CPUS); + stop_machine(__link_module, mod, NULL); /* Size of section 0 is 0, so this works well if no params */ err = parse_args(mod->name, mod->args, @@ -2231,7 +2231,7 @@ static struct module *load_module(void __user *umod, return mod; unlink: - stop_machine_run(__unlink_module, mod, NR_CPUS); + stop_machine(__unlink_module, mod, NULL); module_arch_cleanup(mod); cleanup: kobject_del(&mod->mkobj.kobj); diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 6f8696c502f..aad93cdc9f6 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -91,8 +91,8 @@ static void force_quiescent_state(struct rcu_data *rdp, * rdp->cpu is the current cpu. * * cpu_online_map is updated by the _cpu_down() - * using stop_machine_run(). Since we're in irqs disabled - * section, stop_machine_run() is not exectuting, hence + * using __stop_machine(). Since we're in irqs disabled + * section, __stop_machine() is not exectuting, hence * the cpu_online_map is stable. * * However, a cpu might have been offlined _just_ before diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6da667274df..3cf3d05b6bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2372,7 +2372,7 @@ static void build_zonelist_cache(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ -/* return values int ....just for stop_machine_run() */ +/* return values int ....just for stop_machine() */ static int __build_all_zonelists(void *dummy) { int nid; @@ -2397,7 +2397,7 @@ void build_all_zonelists(void) } else { /* we have to stop all cpus to guarantee there is no user of zonelist */ - stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); + stop_machine(__build_all_zonelists, NULL, NULL); /* cpuset refresh routine should be here */ } vm_total_pages = nr_free_pagecache_pages(); -- cgit v1.2.3-70-g09d2 From 1d1958f05095a7e9ecbba86235122784a3d1b561 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Jul 2008 22:33:16 -0700 Subject: mm: remove find_max_pfn_with_active_regions It has no user now Also print out info about adding/removing active regions. Signed-off-by: Yinghai Lu Acked-by: Mel Gorman Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - mm/page_alloc.c | 17 ----------------- 2 files changed, 18 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 866a3dbe5c7..5e2c8af4999 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1041,7 +1041,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn, extern void get_pfn_range_for_nid(unsigned int nid, unsigned long *start_pfn, unsigned long *end_pfn); extern unsigned long find_min_pfn_with_active_regions(void); -extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3cf3d05b6bd..401d104d2bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3753,23 +3753,6 @@ unsigned long __init find_min_pfn_with_active_regions(void) return find_min_pfn_for_node(MAX_NUMNODES); } -/** - * find_max_pfn_with_active_regions - Find the maximum PFN registered - * - * It returns the maximum PFN based on information provided via - * add_active_range(). - */ -unsigned long __init find_max_pfn_with_active_regions(void) -{ - int i; - unsigned long max_pfn = 0; - - for (i = 0; i < nr_nodemap_entries; i++) - max_pfn = max(max_pfn, early_node_map[i].end_pfn); - - return max_pfn; -} - /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. -- cgit v1.2.3-70-g09d2 From 74768ed833344bb0f82b97cee46320a3d7f09ecd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 12 Aug 2008 15:08:39 -0700 Subject: page allocator: use no-panic variant of alloc_bootmem() in alloc_large_system_hash() .. since a failed allocation is being (initially) handled gracefully, and panic()-ed upon failure explicitly in the function if retries with smaller sizes failed. Signed-off-by: Jan Beulich Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 4 ++++ mm/page_alloc.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 652470b687c..95837bfb525 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -97,10 +97,14 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_nopanic(x) \ + __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_nopanic(x) \ + __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 401d104d2bb..af982f7cdb2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4437,7 +4437,7 @@ void *__init alloc_large_system_hash(const char *tablename, do { size = bucketsize << log2qty; if (flags & HASH_EARLY) - table = alloc_bootmem(size); + table = alloc_bootmem_nopanic(size); else if (hashdist) table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); else { -- cgit v1.2.3-70-g09d2