diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 148 |
1 files changed, 97 insertions, 51 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 431214b941a..2a362c52fdf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -21,6 +21,7 @@ #include <linux/pagemap.h> #include <linux/jiffies.h> #include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/kmemcheck.h> @@ -588,13 +589,13 @@ static void free_pcppages_bulk(struct zone *zone, int count, { int migratetype = 0; int batch_free = 0; + int to_free = count; spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; - __mod_zone_page_state(zone, NR_FREE_PAGES, count); - while (count) { + while (to_free) { struct page *page; struct list_head *list; @@ -619,8 +620,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, zone, 0, page_private(page)); trace_mm_page_pcpu_drain(page, 0, page_private(page)); - } while (--count && --batch_free && !list_empty(list)); + } while (--to_free && --batch_free && !list_empty(list)); } + __mod_zone_page_state(zone, NR_FREE_PAGES, count); spin_unlock(&zone->lock); } @@ -631,8 +633,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order, zone->all_unreclaimable = 0; zone->pages_scanned = 0; - __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); __free_one_page(page, zone, order, migratetype); + __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); spin_unlock(&zone->lock); } @@ -1461,7 +1463,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, { /* free_pages my go negative - that's OK */ long min = mark; - long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; + long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -1738,7 +1740,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, struct page *page; /* Acquire the OOM killer lock for the zones in zonelist */ - if (!try_set_zone_oom(zonelist, gfp_mask)) { + if (!try_set_zonelist_oom(zonelist, gfp_mask)) { schedule_timeout_uninterruptible(1); return NULL; } @@ -1759,6 +1761,9 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, /* The OOM killer will not help higher order allocs */ if (order > PAGE_ALLOC_COSTLY_ORDER) goto out; + /* The OOM killer does not needlessly kill tasks for lowmem */ + if (high_zoneidx < ZONE_NORMAL) + goto out; /* * GFP_THISNODE contains __GFP_NORETRY and we never hit this. * Sanity check for bare calls of __GFP_THISNODE, not real OOM. @@ -1843,6 +1848,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; struct reclaim_state reclaim_state; struct task_struct *p = current; + bool drained = false; cond_resched(); @@ -1861,14 +1867,25 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); - if (order != 0) - drain_all_pages(); + if (unlikely(!(*did_some_progress))) + return NULL; - if (likely(*did_some_progress)) - page = get_page_from_freelist(gfp_mask, nodemask, order, +retry: + page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, migratetype); + + /* + * If an allocation failed after direct reclaim, it could be because + * pages are pinned on the per-cpu lists. Drain them and try again + */ + if (!page && !drained) { + drain_all_pages(); + drained = true; + goto retry; + } + return page; } @@ -2052,15 +2069,23 @@ rebalance: if (page) goto got_pg; - /* - * The OOM killer does not trigger for high-order - * ~__GFP_NOFAIL allocations so if no progress is being - * made, there are no other options and retrying is - * unlikely to help. - */ - if (order > PAGE_ALLOC_COSTLY_ORDER && - !(gfp_mask & __GFP_NOFAIL)) - goto nopage; + if (!(gfp_mask & __GFP_NOFAIL)) { + /* + * The oom killer is not called for high-order + * allocations that may fail, so if no progress + * is being made, there are no other options and + * retrying is unlikely to help. + */ + if (order > PAGE_ALLOC_COSTLY_ORDER) + goto nopage; + /* + * The oom killer is not called for lowmem + * allocations to prevent needlessly killing + * innocent tasks. + */ + if (high_zoneidx < ZONE_NORMAL) + goto nopage; + } goto restart; } @@ -2412,7 +2437,7 @@ void show_free_areas(void) " all_unreclaimable? %s" "\n", zone->name, - K(zone_page_state(zone, NR_FREE_PAGES)), + K(zone_nr_free_pages(zone)), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), @@ -3612,6 +3637,41 @@ void __init free_bootmem_with_active_regions(int nid, } } +#ifdef CONFIG_HAVE_MEMBLOCK +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit) +{ + int i; + + /* Need to go over early_node_map to find out good range for node */ + for_each_active_range_index_in_nid(i, nid) { + u64 addr; + u64 ei_start, ei_last; + u64 final_start, final_end; + + ei_last = early_node_map[i].end_pfn; + ei_last <<= PAGE_SHIFT; + ei_start = early_node_map[i].start_pfn; + ei_start <<= PAGE_SHIFT; + + final_start = max(ei_start, goal); + final_end = min(ei_last, limit); + + if (final_start >= final_end) + continue; + + addr = memblock_find_in_range(final_start, final_end, size, align); + + if (addr == MEMBLOCK_ERROR) + continue; + + return addr; + } + + return MEMBLOCK_ERROR; +} +#endif + int __init add_from_early_node_map(struct range *range, int az, int nr_range, int nid) { @@ -3631,38 +3691,26 @@ int __init add_from_early_node_map(struct range *range, int az, void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, u64 goal, u64 limit) { - int i; void *ptr; + u64 addr; - /* need to go over early_node_map to find out good range for node */ - for_each_active_range_index_in_nid(i, nid) { - u64 addr; - u64 ei_start, ei_last; - - ei_last = early_node_map[i].end_pfn; - ei_last <<= PAGE_SHIFT; - ei_start = early_node_map[i].start_pfn; - ei_start <<= PAGE_SHIFT; - addr = find_early_area(ei_start, ei_last, - goal, limit, size, align); - - if (addr == -1ULL) - continue; + if (limit > memblock.current_limit) + limit = memblock.current_limit; -#if 0 - printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n", - nid, - ei_start, ei_last, goal, limit, size, - align, addr); -#endif + addr = find_memory_core_early(nid, size, align, goal, limit); - ptr = phys_to_virt(addr); - memset(ptr, 0, size); - reserve_early_without_check(addr, addr + size, "BOOTMEM"); - return ptr; - } + if (addr == MEMBLOCK_ERROR) + return NULL; - return NULL; + ptr = phys_to_virt(addr); + memset(ptr, 0, size); + memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); + /* + * The min_count is set to 0 so that bootmem allocated blocks + * are never reported as leaks. + */ + kmemleak_alloc(ptr, size, 0, 0); + return ptr; } #endif @@ -4081,8 +4129,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone_seqlock_init(zone); zone->zone_pgdat = pgdat; - zone->prev_priority = DEF_PRIORITY; - zone_pcp_init(zone); for_each_lru(l) { INIT_LIST_HEAD(&zone->lru[l].list); @@ -5152,9 +5198,9 @@ void *__init alloc_large_system_hash(const char *tablename, if (!table) panic("Failed to allocate %s hash table\n", tablename); - printk(KERN_INFO "%s hash table entries: %d (order: %d, %lu bytes)\n", + printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n", tablename, - (1U << log2qty), + (1UL << log2qty), ilog2(size) - PAGE_SHIFT, size); |