diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 56 |
1 files changed, 40 insertions, 16 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b7ed3767564..adc7e905818 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1177,7 +1177,11 @@ int isolate_lru_page(struct page *page) } /* - * Are there way too many processes in the direct reclaim path already? + * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and + * then get resheduled. When there are massive number of tasks doing page + * allocation, such sleeping direct reclaimers may keep piling up on each CPU, + * the LRU list will go small and be scanned faster than necessary, leading to + * unnecessary swapping, thrashing and OOM. */ static int too_many_isolated(struct zone *zone, int file, struct scan_control *sc) @@ -1198,6 +1202,14 @@ static int too_many_isolated(struct zone *zone, int file, isolated = zone_page_state(zone, NR_ISOLATED_ANON); } + /* + * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they + * won't get blocked by normal direct-reclaimers, forming a circular + * deadlock. + */ + if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS) + inactive >>= 3; + return isolated > inactive; } @@ -1679,13 +1691,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, if (global_reclaim(sc)) { free = zone_page_state(zone, NR_FREE_PAGES); - /* If we have very few page cache pages, - force-scan anon pages. */ if (unlikely(file + free <= high_wmark_pages(zone))) { + /* + * If we have very few page cache pages, force-scan + * anon pages. + */ fraction[0] = 1; fraction[1] = 0; denominator = 1; goto out; + } else if (!inactive_file_is_low_global(zone)) { + /* + * There is enough inactive page cache, do not + * reclaim anything from the working set right now. + */ + fraction[0] = 0; + fraction[1] = 1; + denominator = 1; + goto out; } } @@ -1752,7 +1775,7 @@ out: /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { - if (COMPACTION_BUILD && sc->order && + if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; @@ -2005,7 +2028,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ - if (COMPACTION_BUILD) { + if (IS_ENABLED(CONFIG_COMPACTION)) { /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. @@ -2421,7 +2444,8 @@ static bool zone_balanced(struct zone *zone, int order, balance_gap, classzone_idx, 0)) return false; - if (COMPACTION_BUILD && order && !compaction_suitable(zone, order)) + if (IS_ENABLED(CONFIG_COMPACTION) && order && + !compaction_suitable(zone, order)) return false; return true; @@ -2546,7 +2570,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx) { - int all_zones_ok; + struct zone *unbalanced_zone; unsigned long balanced; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ @@ -2580,7 +2604,7 @@ loop_again: unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; - all_zones_ok = 1; + unbalanced_zone = NULL; balanced = 0; /* @@ -2684,7 +2708,7 @@ loop_again: * Do not reclaim more than needed for compaction. */ testorder = order; - if (COMPACTION_BUILD && order && + if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone, order) != COMPACT_SKIPPED) testorder = 0; @@ -2719,7 +2743,7 @@ loop_again: } if (!zone_balanced(zone, testorder, 0, end_zone)) { - all_zones_ok = 0; + unbalanced_zone = zone; /* * We are still under min water mark. This * means that we have a GFP_ATOMIC allocation @@ -2752,7 +2776,7 @@ loop_again: pfmemalloc_watermark_ok(pgdat)) wake_up(&pgdat->pfmemalloc_wait); - if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) + if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) break; /* kswapd: all done */ /* * OK, kswapd is getting into trouble. Take a nap, then take @@ -2762,7 +2786,7 @@ loop_again: if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else - congestion_wait(BLK_RW_ASYNC, HZ/10); + wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); } /* @@ -2781,7 +2805,7 @@ out: * high-order: Balanced zones must make up at least 25% of the node * for the node to be balanced */ - if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) { + if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) { cond_resched(); try_to_freeze(); @@ -2951,7 +2975,7 @@ static int kswapd(void *p) classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; balanced_classzone_idx = classzone_idx; for ( ; ; ) { - int ret; + bool ret; /* * If the last balance_pgdat was unsuccessful it's unlikely a @@ -3119,7 +3143,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, int nid; if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { - for_each_node_state(nid, N_HIGH_MEMORY) { + for_each_node_state(nid, N_MEMORY) { pg_data_t *pgdat = NODE_DATA(nid); const struct cpumask *mask; @@ -3175,7 +3199,7 @@ static int __init kswapd_init(void) int nid; swap_setup(); - for_each_node_state(nid, N_HIGH_MEMORY) + for_each_node_state(nid, N_MEMORY) kswapd_run(nid); hotcpu_notifier(cpu_callback, 0); return 0; |