summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c112
1 files changed, 59 insertions, 53 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 99999a9b2b0..f6b435c8007 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,7 +41,7 @@
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/sysctl.h>
-#include <linux/compaction.h>
+#include <linux/oom.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -359,7 +359,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
- lock_page_nosync(page);
+ lock_page(page);
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
unlock_page(page);
@@ -515,7 +515,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
freepage = mapping->a_ops->freepage;
- __remove_from_page_cache(page);
+ __delete_from_page_cache(page);
spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
@@ -1066,7 +1066,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* surrounding the tag page. Only take those pages of
* the same active state as that tag page. We may safely
* round the target page pfn down to the requested order
- * as the mem_map is guarenteed valid out to MAX_ORDER,
+ * as the mem_map is guaranteed valid out to MAX_ORDER,
* where that page is in a different zone we will detect
* it from its zone id and abort this block scan.
*/
@@ -1271,16 +1271,14 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
spin_lock_irq(&zone->lru_lock);
continue;
}
+ SetPageLRU(page);
lru = page_lru(page);
+ add_page_to_lru_list(zone, page, lru);
if (is_active_lru(lru)) {
int file = is_file_lru(lru);
int numpages = hpage_nr_pages(page);
reclaim_stat->recent_rotated[file] += numpages;
- if (putback_active_lru_page(zone, page))
- continue;
}
- SetPageLRU(page);
- add_page_to_lru_list(zone, page, lru);
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
@@ -1844,16 +1842,28 @@ static inline bool should_continue_reclaim(struct zone *zone,
if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
return false;
- /*
- * If we failed to reclaim and have scanned the full list, stop.
- * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far
- * faster but obviously would be less likely to succeed
- * allocation. If this is desirable, use GFP_REPEAT to decide
- * if both reclaimed and scanned should be checked or just
- * reclaimed
- */
- if (!nr_reclaimed && !nr_scanned)
- return false;
+ /* Consider stopping depending on scan and reclaim activity */
+ if (sc->gfp_mask & __GFP_REPEAT) {
+ /*
+ * For __GFP_REPEAT allocations, stop reclaiming if the
+ * full LRU list has been scanned and we are still failing
+ * to reclaim pages. This full LRU scan is potentially
+ * expensive but a __GFP_REPEAT caller really wants to succeed
+ */
+ if (!nr_reclaimed && !nr_scanned)
+ return false;
+ } else {
+ /*
+ * For non-__GFP_REPEAT allocations which can presumably
+ * fail without consequence, stop if we failed to reclaim
+ * any pages from the last SWAP_CLUSTER_MAX number of
+ * pages that were scanned. This will return to the
+ * caller faster at the risk reclaim/compaction and
+ * the resulting allocation attempt fails
+ */
+ if (!nr_reclaimed)
+ return false;
+ }
/*
* If we have not reclaimed enough pages for compaction and the
@@ -1885,12 +1895,12 @@ static void shrink_zone(int priority, struct zone *zone,
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
enum lru_list l;
- unsigned long nr_reclaimed;
+ unsigned long nr_reclaimed, nr_scanned;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
- unsigned long nr_scanned = sc->nr_scanned;
restart:
nr_reclaimed = 0;
+ nr_scanned = sc->nr_scanned;
get_scan_count(zone, sc, nr, priority);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1979,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone)
return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
}
-/*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. It can't handle OOM during hibernation.
- * So let's check zone's unreclaimable in direct reclaim as well as kswapd.
- */
+/* All zones in zonelist are unreclaimable? */
static bool all_unreclaimable(struct zonelist *zonelist,
struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
- bool all_unreclaimable = true;
for_each_zone_zonelist_nodemask(zone, z, zonelist,
gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -1997,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist,
continue;
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- if (zone_reclaimable(zone)) {
- all_unreclaimable = false;
- break;
- }
+ if (!zone->all_unreclaimable)
+ return false;
}
- return all_unreclaimable;
+ return true;
}
/*
@@ -2086,7 +2089,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct zone *preferred_zone;
first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
- NULL, &preferred_zone);
+ &cpuset_current_mems_allowed,
+ &preferred_zone);
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
}
}
@@ -2098,6 +2102,14 @@ out:
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
+ /*
+ * As hibernation is going on, kswapd is freezed so that it can't mark
+ * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
+ * check.
+ */
+ if (oom_killer_disabled)
+ return 0;
+
/* top priority shrink_zones still had more to do? don't OOM, then */
if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
return 1;
@@ -2214,7 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
* o a 16M DMA zone that is balanced will not balance a zone on any
* reasonable sized machine
* o On all other machines, the top zone must be at least a reasonable
- * precentage of the middle zones. For example, on 32-bit x86, highmem
+ * percentage of the middle zones. For example, on 32-bit x86, highmem
* would need to be at least 256M for it to be balance a whole node.
* Similarly, on x86-64 the Normal zone would need to be at least 1G
* to balance a node on its own. These seemed like reasonable ratios.
@@ -2387,9 +2399,9 @@ loop_again:
* cause too much scanning of the lower zones.
*/
for (i = 0; i <= end_zone; i++) {
- int compaction;
struct zone *zone = pgdat->node_zones + i;
int nr_slab;
+ unsigned long balance_gap;
if (!populated_zone(zone))
continue;
@@ -2406,11 +2418,20 @@ loop_again:
mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
/*
- * We put equal pressure on every zone, unless one
- * zone has way too many pages free already.
+ * We put equal pressure on every zone, unless
+ * one zone has way too many pages free
+ * already. The "too many pages" is defined
+ * as the high wmark plus a "gap" where the
+ * gap is either the low watermark or 1%
+ * of the zone, whichever is smaller.
*/
+ balance_gap = min(low_wmark_pages(zone),
+ (zone->present_pages +
+ KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+ KSWAPD_ZONE_BALANCE_GAP_RATIO);
if (!zone_watermark_ok_safe(zone, order,
- 8*high_wmark_pages(zone), end_zone, 0))
+ high_wmark_pages(zone) + balance_gap,
+ end_zone, 0))
shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2418,24 +2439,9 @@ loop_again:
sc.nr_reclaimed += reclaim_state->reclaimed_slab;
total_scanned += sc.nr_scanned;
- compaction = 0;
- if (order &&
- zone_watermark_ok(zone, 0,
- high_wmark_pages(zone),
- end_zone, 0) &&
- !zone_watermark_ok(zone, order,
- high_wmark_pages(zone),
- end_zone, 0)) {
- compact_zone_order(zone,
- order,
- sc.gfp_mask, false,
- COMPACT_MODE_KSWAPD);
- compaction = 1;
- }
-
if (zone->all_unreclaimable)
continue;
- if (!compaction && nr_slab == 0 &&
+ if (nr_slab == 0 &&
!zone_reclaimable(zone))
zone->all_unreclaimable = 1;
/*