summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c244
1 files changed, 178 insertions, 66 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f32fae3121f..6da667274df 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -153,9 +153,9 @@ static unsigned long __meminitdata dma_reserve;
static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
- unsigned long __initdata required_kernelcore;
+ static unsigned long __initdata required_kernelcore;
static unsigned long __initdata required_movablecore;
- unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+ static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
int movable_zone;
@@ -264,7 +264,7 @@ static void free_compound_page(struct page *page)
__free_pages_ok(page, compound_order(page));
}
-static void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
@@ -432,8 +432,9 @@ static inline void __free_one_page(struct page *page,
buddy = __page_find_buddy(page, page_idx, order);
if (!page_is_buddy(page, buddy, order))
- break; /* Move the buddy up one level. */
+ break;
+ /* Our buddy is free, merge with it and move up one order. */
list_del(&buddy->lru);
zone->free_area[order].nr_free--;
rmv_page_order(buddy);
@@ -532,7 +533,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
/*
* permit the bootmem allocator to evade page validation on high-order frees
*/
-void __free_pages_bootmem(struct page *page, unsigned int order)
+void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
{
if (order == 0) {
__ClearPageReserved(page);
@@ -673,9 +674,9 @@ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/
-int move_freepages(struct zone *zone,
- struct page *start_page, struct page *end_page,
- int migratetype)
+static int move_freepages(struct zone *zone,
+ struct page *start_page, struct page *end_page,
+ int migratetype)
{
struct page *page;
unsigned long order;
@@ -714,7 +715,8 @@ int move_freepages(struct zone *zone,
return pages_moved;
}
-int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
+static int move_freepages_block(struct zone *zone, struct page *page,
+ int migratetype)
{
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
@@ -918,7 +920,7 @@ void drain_local_pages(void *arg)
*/
void drain_all_pages(void)
{
- on_each_cpu(drain_local_pages, NULL, 0, 1);
+ on_each_cpu(drain_local_pages, NULL, 1);
}
#ifdef CONFIG_HIBERNATION
@@ -1429,7 +1431,7 @@ try_next_zone:
/*
* This is the 'heart' of the zoned buddy allocator.
*/
-static struct page *
+struct page *
__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
@@ -1632,22 +1634,7 @@ nopage:
got_pg:
return page;
}
-
-struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist)
-{
- return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
-}
-
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist, nodemask_t *nodemask)
-{
- return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
-}
-
-EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__alloc_pages_internal);
/*
* Common helper functions.
@@ -1711,6 +1698,59 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
+/**
+ * alloc_pages_exact - allocate an exact number physically-contiguous pages.
+ * @size: the number of bytes to allocate
+ * @gfp_mask: GFP flags for the allocation
+ *
+ * This function is similar to alloc_pages(), except that it allocates the
+ * minimum number of pages to satisfy the request. alloc_pages() can only
+ * allocate memory in power-of-two pages.
+ *
+ * This function is also limited by MAX_ORDER.
+ *
+ * Memory allocated by this function must be released by free_pages_exact().
+ */
+void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
+{
+ unsigned int order = get_order(size);
+ unsigned long addr;
+
+ addr = __get_free_pages(gfp_mask, order);
+ if (addr) {
+ unsigned long alloc_end = addr + (PAGE_SIZE << order);
+ unsigned long used = addr + PAGE_ALIGN(size);
+
+ split_page(virt_to_page(addr), order);
+ while (used < alloc_end) {
+ free_page(used);
+ used += PAGE_SIZE;
+ }
+ }
+
+ return (void *)addr;
+}
+EXPORT_SYMBOL(alloc_pages_exact);
+
+/**
+ * free_pages_exact - release memory allocated via alloc_pages_exact()
+ * @virt: the value returned by alloc_pages_exact.
+ * @size: size of allocation, same value as passed to alloc_pages_exact().
+ *
+ * Release the memory allocated by a previous call to alloc_pages_exact.
+ */
+void free_pages_exact(void *virt, size_t size)
+{
+ unsigned long addr = (unsigned long)virt;
+ unsigned long end = addr + PAGE_ALIGN(size);
+
+ while (addr < end) {
+ free_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
+EXPORT_SYMBOL(free_pages_exact);
+
static unsigned int nr_free_zone_pages(int offset)
{
struct zoneref *z;
@@ -2352,6 +2392,7 @@ void build_all_zonelists(void)
if (system_state == SYSTEM_BOOTING) {
__build_all_zonelists(NULL);
+ mminit_verify_zonelist();
cpuset_init_current_mems_allowed();
} else {
/* we have to stop all cpus to guarantee there is no user
@@ -2534,6 +2575,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
}
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
+ mminit_verify_page_links(page, zone, nid, pfn);
init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
@@ -2611,7 +2653,7 @@ static int zone_batchsize(struct zone *zone)
return batch;
}
-inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
{
struct per_cpu_pages *pcp;
@@ -2836,6 +2878,12 @@ __meminit int init_currently_empty_zone(struct zone *zone,
zone->zone_start_pfn = zone_start_pfn;
+ mminit_dprintk(MMINIT_TRACE, "memmap_init",
+ "Initialising map node %d zone %lu pfns %lu -> %lu\n",
+ pgdat->node_id,
+ (unsigned long)zone_idx(zone),
+ zone_start_pfn, (zone_start_pfn + size));
+
zone_init_free_lists(zone);
return 0;
@@ -2929,6 +2977,18 @@ void __init free_bootmem_with_active_regions(int nid,
}
}
+void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
+{
+ int i;
+ int ret;
+
+ for_each_active_range_index_in_nid(i, nid) {
+ ret = work_fn(early_node_map[i].start_pfn,
+ early_node_map[i].end_pfn, data);
+ if (ret)
+ break;
+ }
+}
/**
* sparse_memory_present_with_active_regions - Call memory_present for each active range
* @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -2963,7 +3023,8 @@ void __init sparse_memory_present_with_active_regions(int nid)
void __init push_node_boundaries(unsigned int nid,
unsigned long start_pfn, unsigned long end_pfn)
{
- printk(KERN_DEBUG "Entering push_node_boundaries(%u, %lu, %lu)\n",
+ mminit_dprintk(MMINIT_TRACE, "zoneboundary",
+ "Entering push_node_boundaries(%u, %lu, %lu)\n",
nid, start_pfn, end_pfn);
/* Initialise the boundary for this node if necessary */
@@ -2981,7 +3042,8 @@ void __init push_node_boundaries(unsigned int nid,
static void __meminit account_node_boundary(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn)
{
- printk(KERN_DEBUG "Entering account_node_boundary(%u, %lu, %lu)\n",
+ mminit_dprintk(MMINIT_TRACE, "zoneboundary",
+ "Entering account_node_boundary(%u, %lu, %lu)\n",
nid, *start_pfn, *end_pfn);
/* Return if boundary information has not been provided */
@@ -3038,7 +3100,7 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
* assumption is made that zones within a node are ordered in monotonic
* increasing memory addresses so that the "highest" populated zone is used
*/
-void __init find_usable_zone_for_movable(void)
+static void __init find_usable_zone_for_movable(void)
{
int zone_index;
for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
@@ -3064,7 +3126,7 @@ void __init find_usable_zone_for_movable(void)
* highest usable zone for ZONE_MOVABLE. This preserves the assumption that
* zones within a node are in order of monotonic increases memory addresses
*/
-void __meminit adjust_zone_range_for_zone_movable(int nid,
+static void __meminit adjust_zone_range_for_zone_movable(int nid,
unsigned long zone_type,
unsigned long node_start_pfn,
unsigned long node_end_pfn,
@@ -3125,7 +3187,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
*/
-unsigned long __meminit __absent_pages_in_range(int nid,
+static unsigned long __meminit __absent_pages_in_range(int nid,
unsigned long range_start_pfn,
unsigned long range_end_pfn)
{
@@ -3356,8 +3418,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
if (realsize >= memmap_pages) {
realsize -= memmap_pages;
- printk(KERN_DEBUG
- " %s zone: %lu pages used for memmap\n",
+ mminit_dprintk(MMINIT_TRACE, "memmap_init",
+ "%s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
@@ -3367,7 +3429,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
/* Account for reserved pages */
if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
- printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
+ mminit_dprintk(MMINIT_TRACE, "memmap_init",
+ "%s zone: %lu pages reserved\n",
zone_names[0], dma_reserve);
}
@@ -3452,15 +3515,21 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
}
-void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat,
- unsigned long *zones_size, unsigned long node_start_pfn,
- unsigned long *zholes_size)
+void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
+ unsigned long node_start_pfn, unsigned long *zholes_size)
{
+ pg_data_t *pgdat = NODE_DATA(nid);
+
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+ printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
+ nid, (unsigned long)pgdat,
+ (unsigned long)pgdat->node_mem_map);
+#endif
free_area_init_core(pgdat, zones_size, zholes_size);
}
@@ -3503,10 +3572,13 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
{
int i;
- printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) "
- "%d entries of %d used\n",
- nid, start_pfn, end_pfn,
- nr_nodemap_entries, MAX_ACTIVE_REGIONS);
+ mminit_dprintk(MMINIT_TRACE, "memory_register",
+ "Entering add_active_range(%d, %#lx, %#lx) "
+ "%d entries of %d used\n",
+ nid, start_pfn, end_pfn,
+ nr_nodemap_entries, MAX_ACTIVE_REGIONS);
+
+ mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
/* Merge with existing active regions if possible */
for (i = 0; i < nr_nodemap_entries; i++) {
@@ -3547,27 +3619,68 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
}
/**
- * shrink_active_range - Shrink an existing registered range of PFNs
+ * remove_active_range - Shrink an existing registered range of PFNs
* @nid: The node id the range is on that should be shrunk
- * @old_end_pfn: The old end PFN of the range
- * @new_end_pfn: The new PFN of the range
+ * @start_pfn: The new PFN of the range
+ * @end_pfn: The new PFN of the range
*
* i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- * The map is kept at the end physical page range that has already been
- * registered with add_active_range(). This function allows an arch to shrink
- * an existing registered range.
+ * The map is kept near the end physical page range that has already been
+ * registered. This function allows an arch to shrink an existing registered
+ * range.
*/
-void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
- unsigned long new_end_pfn)
+void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
{
- int i;
+ int i, j;
+ int removed = 0;
+
+ printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
+ nid, start_pfn, end_pfn);
/* Find the old active region end and shrink */
- for_each_active_range_index_in_nid(i, nid)
- if (early_node_map[i].end_pfn == old_end_pfn) {
- early_node_map[i].end_pfn = new_end_pfn;
- break;
+ for_each_active_range_index_in_nid(i, nid) {
+ if (early_node_map[i].start_pfn >= start_pfn &&
+ early_node_map[i].end_pfn <= end_pfn) {
+ /* clear it */
+ early_node_map[i].start_pfn = 0;
+ early_node_map[i].end_pfn = 0;
+ removed = 1;
+ continue;
+ }
+ if (early_node_map[i].start_pfn < start_pfn &&
+ early_node_map[i].end_pfn > start_pfn) {
+ unsigned long temp_end_pfn = early_node_map[i].end_pfn;
+ early_node_map[i].end_pfn = start_pfn;
+ if (temp_end_pfn > end_pfn)
+ add_active_range(nid, end_pfn, temp_end_pfn);
+ continue;
+ }
+ if (early_node_map[i].start_pfn >= start_pfn &&
+ early_node_map[i].end_pfn > end_pfn &&
+ early_node_map[i].start_pfn < end_pfn) {
+ early_node_map[i].start_pfn = end_pfn;
+ continue;
}
+ }
+
+ if (!removed)
+ return;
+
+ /* remove the blank ones */
+ for (i = nr_nodemap_entries - 1; i > 0; i--) {
+ if (early_node_map[i].nid != nid)
+ continue;
+ if (early_node_map[i].end_pfn)
+ continue;
+ /* we found it, get rid of it */
+ for (j = i; j < nr_nodemap_entries - 1; j++)
+ memcpy(&early_node_map[j], &early_node_map[j+1],
+ sizeof(early_node_map[j]));
+ j = nr_nodemap_entries - 1;
+ memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
+ nr_nodemap_entries--;
+ }
}
/**
@@ -3611,7 +3724,7 @@ static void __init sort_node_map(void)
}
/* Find the lowest pfn for a node */
-unsigned long __init find_min_pfn_for_node(unsigned long nid)
+static unsigned long __init find_min_pfn_for_node(int nid)
{
int i;
unsigned long min_pfn = ULONG_MAX;
@@ -3622,7 +3735,7 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid)
if (min_pfn == ULONG_MAX) {
printk(KERN_WARNING
- "Could not find start_pfn for node %lu\n", nid);
+ "Could not find start_pfn for node %d\n", nid);
return 0;
}
@@ -3683,7 +3796,7 @@ static unsigned long __init early_calculate_totalpages(void)
* memory. When they don't, some nodes will have more kernelcore than
* others
*/
-void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
+static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
{
int i, nid;
unsigned long usable_startpfn;
@@ -3878,7 +3991,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
for (i = 0; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
- printk(" %-8s %8lu -> %8lu\n",
+ printk(" %-8s %0#10lx -> %0#10lx\n",
zone_names[i],
arch_zone_lowest_possible_pfn[i],
arch_zone_highest_possible_pfn[i]);
@@ -3894,15 +4007,16 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
/* Print out the early_node_map[] */
printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
for (i = 0; i < nr_nodemap_entries; i++)
- printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid,
+ printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
early_node_map[i].start_pfn,
early_node_map[i].end_pfn);
/* Initialise every node */
+ mminit_verify_pageflags_layout();
setup_nr_node_ids();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
- free_area_init_node(nid, pgdat, NULL,
+ free_area_init_node(nid, NULL,
find_min_pfn_for_node(nid), NULL);
/* Any memory on that node */
@@ -3967,15 +4081,13 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-static bootmem_data_t contig_bootmem_data;
-struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
-
+struct pglist_data contig_page_data = { .bdata = &bootmem_node_data[0] };
EXPORT_SYMBOL(contig_page_data);
#endif
void __init free_area_init(unsigned long *zones_size)
{
- free_area_init_node(0, NODE_DATA(0), zones_size,
+ free_area_init_node(0, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}