diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/compaction.c | 10 | ||||
-rw-r--r-- | mm/dmapool.c | 31 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/mempolicy.c | 22 | ||||
-rw-r--r-- | mm/page_alloc.c | 11 | ||||
-rw-r--r-- | mm/shmem.c | 26 | ||||
-rw-r--r-- | mm/sparse.c | 10 | ||||
-rw-r--r-- | mm/vmscan.c | 83 |
8 files changed, 95 insertions, 106 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 9eef55838fc..694eaabaaeb 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -713,7 +713,15 @@ static void isolate_freepages(struct zone *zone, /* Found a block suitable for isolating free pages from */ isolated = 0; - end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + + /* + * As pfn may not start aligned, pfn+pageblock_nr_page + * may cross a MAX_ORDER_NR_PAGES boundary and miss + * a pfn_valid check. Ensure isolate_freepages_block() + * only scans within a pageblock + */ + end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + end_pfn = min(end_pfn, zone_end_pfn); isolated = isolate_freepages_block(cc, pfn, end_pfn, freelist, false); nr_freepages += isolated; diff --git a/mm/dmapool.c b/mm/dmapool.c index c5ab33bca0a..da1b0f0b870 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -50,7 +50,6 @@ struct dma_pool { /* the pool */ size_t allocation; size_t boundary; char name[32]; - wait_queue_head_t waitq; struct list_head pools; }; @@ -62,8 +61,6 @@ struct dma_page { /* cacheable header for 'allocation' bytes */ unsigned int offset; }; -#define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000) - static DEFINE_MUTEX(pools_lock); static ssize_t @@ -172,7 +169,6 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev, retval->size = size; retval->boundary = boundary; retval->allocation = allocation; - init_waitqueue_head(&retval->waitq); if (dev) { int ret; @@ -227,7 +223,6 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags) memset(page->vaddr, POOL_POISON_FREED, pool->allocation); #endif pool_initialise_page(pool, page); - list_add(&page->page_list, &pool->page_list); page->in_use = 0; page->offset = 0; } else { @@ -315,30 +310,21 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, might_sleep_if(mem_flags & __GFP_WAIT); spin_lock_irqsave(&pool->lock, flags); - restart: list_for_each_entry(page, &pool->page_list, page_list) { if (page->offset < pool->allocation) goto ready; } - page = pool_alloc_page(pool, GFP_ATOMIC); - if (!page) { - if (mem_flags & __GFP_WAIT) { - DECLARE_WAITQUEUE(wait, current); - __set_current_state(TASK_UNINTERRUPTIBLE); - __add_wait_queue(&pool->waitq, &wait); - spin_unlock_irqrestore(&pool->lock, flags); + /* pool_alloc_page() might sleep, so temporarily drop &pool->lock */ + spin_unlock_irqrestore(&pool->lock, flags); - schedule_timeout(POOL_TIMEOUT_JIFFIES); + page = pool_alloc_page(pool, mem_flags); + if (!page) + return NULL; - spin_lock_irqsave(&pool->lock, flags); - __remove_wait_queue(&pool->waitq, &wait); - goto restart; - } - retval = NULL; - goto done; - } + spin_lock_irqsave(&pool->lock, flags); + list_add(&page->page_list, &pool->page_list); ready: page->in_use++; offset = page->offset; @@ -348,7 +334,6 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, #ifdef DMAPOOL_DEBUG memset(retval, POOL_POISON_ALLOCATED, pool->size); #endif - done: spin_unlock_irqrestore(&pool->lock, flags); return retval; } @@ -435,8 +420,6 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma) page->in_use--; *(int *)vaddr = page->offset; page->offset = offset; - if (waitqueue_active(&pool->waitq)) - wake_up_locked(&pool->waitq); /* * Resist a temptation to do * if (!is_page_busy(page)) pool_free_page(pool, page); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6c5899b9034..8b20278be6a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1476,9 +1476,17 @@ int soft_offline_page(struct page *page, int flags) { int ret; unsigned long pfn = page_to_pfn(page); + struct page *hpage = compound_trans_head(page); if (PageHuge(page)) return soft_offline_huge_page(page, flags); + if (PageTransHuge(hpage)) { + if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { + pr_info("soft offline: %#lx: failed to split THP\n", + pfn); + return -EBUSY; + } + } ret = get_any_page(page, pfn, flags); if (ret < 0) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d04a8a54c29..4ea600da894 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2037,28 +2037,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) return new; } -/* - * If *frompol needs [has] an extra ref, copy *frompol to *tompol , - * eliminate the * MPOL_F_* flags that require conditional ref and - * [NOTE!!!] drop the extra ref. Not safe to reference *frompol directly - * after return. Use the returned value. - * - * Allows use of a mempolicy for, e.g., multiple allocations with a single - * policy lookup, even if the policy needs/has extra ref on lookup. - * shmem_readahead needs this. - */ -struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol, - struct mempolicy *frompol) -{ - if (!mpol_needs_cond_ref(frompol)) - return frompol; - - *tompol = *frompol; - tompol->flags &= ~MPOL_F_SHARED; /* copy doesn't need unref */ - __mpol_put(frompol); - return tompol; -} - /* Slow path of a mempolicy comparison */ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7bb35ac0964..7e208f0ad68 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1405,7 +1405,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) mt = get_pageblock_migratetype(page); if (unlikely(mt != MIGRATE_ISOLATE)) - __mod_zone_freepage_state(zone, -(1UL << order), mt); + __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); if (alloc_order != order) expand(zone, page, alloc_order, order, @@ -1422,7 +1422,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) } } - return 1UL << order; + return 1UL << alloc_order; } /* @@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; restart: - wake_all_kswapd(order, zonelist, high_zoneidx, - zone_idx(preferred_zone)); + if (!(gfp_mask & __GFP_NO_KSWAPD)) + wake_all_kswapd(order, zonelist, high_zoneidx, + zone_idx(preferred_zone)); /* * OK, we're below the kswapd watermark and have kicked background @@ -2494,7 +2495,7 @@ rebalance: * system then fail the allocation instead of entering direct reclaim. */ if ((deferred_compaction || contended_compaction) && - (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) + (gfp_mask & __GFP_NO_KSWAPD)) goto nopage; /* Try direct reclaim and then allocating */ diff --git a/mm/shmem.c b/mm/shmem.c index 89341b658bd..50c5b8f3a35 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -910,25 +910,29 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { - struct mempolicy mpol, *spol; struct vm_area_struct pvma; - - spol = mpol_cond_copy(&mpol, - mpol_shared_policy_lookup(&info->policy, index)); + struct page *page; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; /* Bias interleave by inode number to distribute better across nodes */ pvma.vm_pgoff = index + info->vfs_inode.i_ino; pvma.vm_ops = NULL; - pvma.vm_policy = spol; - return swapin_readahead(swap, gfp, &pvma, 0); + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); + + page = swapin_readahead(swap, gfp, &pvma, 0); + + /* Drop reference taken by mpol_shared_policy_lookup() */ + mpol_cond_put(pvma.vm_policy); + + return page; } static struct page *shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; + struct page *page; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; @@ -937,10 +941,12 @@ static struct page *shmem_alloc_page(gfp_t gfp, pvma.vm_ops = NULL; pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); - /* - * alloc_page_vma() will drop the shared policy reference - */ - return alloc_page_vma(gfp, &pvma, 0); + page = alloc_page_vma(gfp, &pvma, 0); + + /* Drop reference taken by mpol_shared_policy_lookup() */ + mpol_cond_put(pvma.vm_policy); + + return page; } #else /* !CONFIG_NUMA */ #ifdef CONFIG_TMPFS diff --git a/mm/sparse.c b/mm/sparse.c index fac95f2888f..a83de2f72b3 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -617,7 +617,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { return; /* XXX: Not implemented yet */ } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { } #else @@ -658,10 +658,11 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) get_order(sizeof(struct page) * nr_pages)); } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; unsigned long magic; + struct page *page = virt_to_page(memmap); for (i = 0; i < nr_pages; i++, page++) { magic = (unsigned long) page->lru.next; @@ -710,13 +711,10 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) */ if (memmap) { - struct page *memmap_page; - memmap_page = virt_to_page(memmap); - nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) >> PAGE_SHIFT; - free_map_bootmem(memmap_page, nr_pages); + free_map_bootmem(memmap, nr_pages); } } diff --git a/mm/vmscan.c b/mm/vmscan.c index 48550c66f1f..b7ed3767564 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2207,9 +2207,12 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) * Throttle direct reclaimers if backing storage is backed by the network * and the PFMEMALLOC reserve for the preferred node is getting dangerously * depleted. kswapd will continue to make progress and wake the processes - * when the low watermark is reached + * when the low watermark is reached. + * + * Returns true if a fatal signal was delivered during throttling. If this + * happens, the page allocator should not consider triggering the OOM killer. */ -static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, +static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, nodemask_t *nodemask) { struct zone *zone; @@ -2224,13 +2227,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, * processes to block on log_wait_commit(). */ if (current->flags & PF_KTHREAD) - return; + goto out; + + /* + * If a fatal signal is pending, this process should not throttle. + * It should return quickly so it can exit and free its memory + */ + if (fatal_signal_pending(current)) + goto out; /* Check if the pfmemalloc reserves are ok */ first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); pgdat = zone->zone_pgdat; if (pfmemalloc_watermark_ok(pgdat)) - return; + goto out; /* Account for the throttling */ count_vm_event(PGSCAN_DIRECT_THROTTLE); @@ -2246,12 +2256,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, if (!(gfp_mask & __GFP_FS)) { wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, pfmemalloc_watermark_ok(pgdat), HZ); - return; + + goto check_pending; } /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, pfmemalloc_watermark_ok(pgdat)); + +check_pending: + if (fatal_signal_pending(current)) + return true; + +out: + return false; } unsigned long try_to_free_pages(struct zonelist *zonelist, int order, @@ -2273,13 +2291,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .gfp_mask = sc.gfp_mask, }; - throttle_direct_reclaim(gfp_mask, zonelist, nodemask); - /* - * Do not enter reclaim if fatal signal is pending. 1 is returned so - * that the page allocator does not consider triggering OOM + * Do not enter reclaim if fatal signal was delivered while throttled. + * 1 is returned so that the page allocator does not OOM kill at this + * point. */ - if (fatal_signal_pending(current)) + if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask)) return 1; trace_mm_vmscan_direct_reclaim_begin(order, @@ -2397,6 +2414,19 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) } while (memcg); } +static bool zone_balanced(struct zone *zone, int order, + unsigned long balance_gap, int classzone_idx) +{ + if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) + + balance_gap, classzone_idx, 0)) + return false; + + if (COMPACTION_BUILD && order && !compaction_suitable(zone, order)) + return false; + + return true; +} + /* * pgdat_balanced is used when checking if a node is balanced for high-order * allocations. Only zones that meet watermarks and are in a zone allowed @@ -2475,8 +2505,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, continue; } - if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), - i, 0)) + if (!zone_balanced(zone, order, 0, i)) all_zones_ok = false; else balanced += zone->present_pages; @@ -2585,8 +2614,7 @@ loop_again: break; } - if (!zone_watermark_ok_safe(zone, order, - high_wmark_pages(zone), 0, 0)) { + if (!zone_balanced(zone, order, 0, 0)) { end_zone = i; break; } else { @@ -2662,9 +2690,8 @@ loop_again: testorder = 0; if ((buffer_heads_over_limit && is_highmem_idx(i)) || - !zone_watermark_ok_safe(zone, testorder, - high_wmark_pages(zone) + balance_gap, - end_zone, 0)) { + !zone_balanced(zone, testorder, + balance_gap, end_zone)) { shrink_zone(zone, &sc); reclaim_state->reclaimed_slab = 0; @@ -2691,8 +2718,7 @@ loop_again: continue; } - if (!zone_watermark_ok_safe(zone, testorder, - high_wmark_pages(zone), end_zone, 0)) { + if (!zone_balanced(zone, testorder, 0, end_zone)) { all_zones_ok = 0; /* * We are still under min water mark. This @@ -2797,29 +2823,10 @@ out: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && - sc.priority != DEF_PRIORITY) - continue; - - /* Would compaction fail due to lack of free memory? */ - if (COMPACTION_BUILD && - compaction_suitable(zone, order) == COMPACT_SKIPPED) - goto loop_again; - - /* Confirm the zone is balanced for order-0 */ - if (!zone_watermark_ok(zone, 0, - high_wmark_pages(zone), 0, 0)) { - order = sc.order = 0; - goto loop_again; - } - /* Check if the memory needs to be defragmented. */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), *classzone_idx, 0)) zones_need_compaction = 0; - - /* If balanced, clear the congested flag */ - zone_clear_flag(zone, ZONE_CONGESTED); } if (zones_need_compaction) |