From 49255c619fbd482d704289b5eb2795f8e3b7ff2e Mon Sep 17 00:00:00 2001 From: Mel Gorman <mel@csn.ul.ie> Date: Tue, 16 Jun 2009 15:31:58 -0700 Subject: page allocator: move check for disabled anti-fragmentation out of fastpath On low-memory systems, anti-fragmentation gets disabled as there is nothing it can do and it would just incur overhead shuffling pages between lists constantly. Currently the check is made in the free page fast path for every page. This patch moves it to a slow path. On machines with low memory, there will be small amount of additional overhead as pages get shuffled between lists but it should quickly settle. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/mmzone.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a47c879e130..0aa4445b0b8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -50,9 +50,6 @@ extern int page_group_by_mobility_disabled; static inline int get_pageblock_migratetype(struct page *page) { - if (unlikely(page_group_by_mobility_disabled)) - return MIGRATE_UNMOVABLE; - return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); } -- cgit v1.2.3-70-g09d2 From 418589663d6011de9006425b6c5721e1544fb47a Mon Sep 17 00:00:00 2001 From: Mel Gorman <mel@csn.ul.ie> Date: Tue, 16 Jun 2009 15:32:12 -0700 Subject: page allocator: use allocation flags as an index to the zone watermark ALLOC_WMARK_MIN, ALLOC_WMARK_LOW and ALLOC_WMARK_HIGH determin whether pages_min, pages_low or pages_high is used as the zone watermark when allocating the pages. Two branches in the allocator hotpath determine which watermark to use. This patch uses the flags as an array index into a watermark array that is indexed with WMARK_* defines accessed via helpers. All call sites that use zone->pages_* are updated to use the helpers for accessing the values and the array offsets for setting. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- Documentation/sysctl/vm.txt | 11 +++++----- Documentation/vm/balance | 18 ++++++++-------- arch/m32r/mm/discontig.c | 6 +++--- include/linux/mmzone.h | 16 +++++++++++++- mm/page_alloc.c | 51 +++++++++++++++++++++++---------------------- mm/vmscan.c | 39 ++++++++++++++++++---------------- mm/vmstat.c | 6 +++--- 7 files changed, 83 insertions(+), 64 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 6fab2dcbb4d..0ea5adbc5b1 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -233,8 +233,8 @@ These protections are added to score to judge whether this zone should be used for page allocation or should be reclaimed. In this example, if normal pages (index=2) are required to this DMA zone and -pages_high is used for watermark, the kernel judges this zone should not be -used because pages_free(1355) is smaller than watermark + protection[2] +watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should +not be used because pages_free(1355) is smaller than watermark + protection[2] (4 + 2004 = 2008). If this protection value is 0, this zone would be used for normal page requirement. If requirement is DMA zone(index=0), protection[0] (=0) is used. @@ -280,9 +280,10 @@ The default value is 65536. min_free_kbytes: This is used to force the Linux VM to keep a minimum number -of kilobytes free. The VM uses this number to compute a pages_min -value for each lowmem zone in the system. Each lowmem zone gets -a number of reserved free pages based proportionally on its size. +of kilobytes free. The VM uses this number to compute a +watermark[WMARK_MIN] value for each lowmem zone in the system. +Each lowmem zone gets a number of reserved free pages based +proportionally on its size. Some minimal amount of memory is needed to satisfy PF_MEMALLOC allocations; if you set this to lower than 1024KB, your system will diff --git a/Documentation/vm/balance b/Documentation/vm/balance index bd3d31bc491..c46e68cf934 100644 --- a/Documentation/vm/balance +++ b/Documentation/vm/balance @@ -75,15 +75,15 @@ Page stealing from process memory and shm is done if stealing the page would alleviate memory pressure on any zone in the page's node that has fallen below its watermark. -pages_min/pages_low/pages_high/low_on_memory/zone_wake_kswapd: These are -per-zone fields, used to determine when a zone needs to be balanced. When -the number of pages falls below pages_min, the hysteric field low_on_memory -gets set. This stays set till the number of free pages becomes pages_high. -When low_on_memory is set, page allocation requests will try to free some -pages in the zone (providing GFP_WAIT is set in the request). Orthogonal -to this, is the decision to poke kswapd to free some zone pages. That -decision is not hysteresis based, and is done when the number of free -pages is below pages_low; in which case zone_wake_kswapd is also set. +watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These +are per-zone fields, used to determine when a zone needs to be balanced. When +the number of pages falls below watermark[WMARK_MIN], the hysteric field +low_on_memory gets set. This stays set till the number of free pages becomes +watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will +try to free some pages in the zone (providing GFP_WAIT is set in the request). +Orthogonal to this, is the decision to poke kswapd to free some zone pages. +That decision is not hysteresis based, and is done when the number of free +pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set. (Good) Ideas that I have heard: diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index 7daf897292c..b7a78ad429b 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -154,9 +154,9 @@ unsigned long __init zone_sizes_init(void) * Use all area of internal RAM. * see __alloc_pages() */ - NODE_DATA(1)->node_zones->pages_min = 0; - NODE_DATA(1)->node_zones->pages_low = 0; - NODE_DATA(1)->node_zones->pages_high = 0; + NODE_DATA(1)->node_zones->watermark[WMARK_MIN] = 0; + NODE_DATA(1)->node_zones->watermark[WMARK_LOW] = 0; + NODE_DATA(1)->node_zones->watermark[WMARK_HIGH] = 0; return holes; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0aa4445b0b8..dd8487f0442 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -163,6 +163,17 @@ static inline int is_unevictable_lru(enum lru_list l) #endif } +enum zone_watermarks { + WMARK_MIN, + WMARK_LOW, + WMARK_HIGH, + NR_WMARK +}; + +#define min_wmark_pages(z) (z->watermark[WMARK_MIN]) +#define low_wmark_pages(z) (z->watermark[WMARK_LOW]) +#define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) + struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ @@ -275,7 +286,10 @@ struct zone_reclaim_stat { struct zone { /* Fields commonly accessed by the page allocator */ - unsigned long pages_min, pages_low, pages_high; + + /* zone watermarks, access with *_wmark_pages(zone) macros */ + unsigned long watermark[NR_WMARK]; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8485735fc69..abe26003124 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1150,10 +1150,15 @@ failed: return NULL; } -#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ -#define ALLOC_WMARK_MIN 0x02 /* use pages_min watermark */ -#define ALLOC_WMARK_LOW 0x04 /* use pages_low watermark */ -#define ALLOC_WMARK_HIGH 0x08 /* use pages_high watermark */ +/* The ALLOC_WMARK bits are used as an index to zone->watermark */ +#define ALLOC_WMARK_MIN WMARK_MIN +#define ALLOC_WMARK_LOW WMARK_LOW +#define ALLOC_WMARK_HIGH WMARK_HIGH +#define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ + +/* Mask to get the watermark bits */ +#define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) + #define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ @@ -1440,14 +1445,10 @@ zonelist_scan: !cpuset_zone_allowed_softwall(zone, gfp_mask)) goto try_next_zone; + BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; - if (alloc_flags & ALLOC_WMARK_MIN) - mark = zone->pages_min; - else if (alloc_flags & ALLOC_WMARK_LOW) - mark = zone->pages_low; - else - mark = zone->pages_high; + mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (!zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) { if (!zone_reclaim_mode || @@ -1959,7 +1960,7 @@ static unsigned int nr_free_zone_pages(int offset) for_each_zone_zonelist(zone, z, zonelist, offset) { unsigned long size = zone->present_pages; - unsigned long high = zone->pages_high; + unsigned long high = high_wmark_pages(zone); if (size > high) sum += size - high; } @@ -2096,9 +2097,9 @@ void show_free_areas(void) "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), - K(zone->pages_min), - K(zone->pages_low), - K(zone->pages_high), + K(min_wmark_pages(zone)), + K(low_wmark_pages(zone)), + K(high_wmark_pages(zone)), K(zone_page_state(zone, NR_ACTIVE_ANON)), K(zone_page_state(zone, NR_INACTIVE_ANON)), K(zone_page_state(zone, NR_ACTIVE_FILE)), @@ -2702,8 +2703,8 @@ static inline unsigned long wait_table_bits(unsigned long size) /* * Mark a number of pageblocks as MIGRATE_RESERVE. The number - * of blocks reserved is based on zone->pages_min. The memory within the - * reserve will tend to store contiguous free pages. Setting min_free_kbytes + * of blocks reserved is based on min_wmark_pages(zone). The memory within + * the reserve will tend to store contiguous free pages. Setting min_free_kbytes * higher will lead to a bigger reserve which will get freed as contiguous * blocks as reclaim kicks in */ @@ -2716,7 +2717,7 @@ static void setup_zone_migrate_reserve(struct zone *zone) /* Get the start pfn, end pfn and the number of blocks to reserve */ start_pfn = zone->zone_start_pfn; end_pfn = start_pfn + zone->spanned_pages; - reserve = roundup(zone->pages_min, pageblock_nr_pages) >> + reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> pageblock_order; for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { @@ -4319,8 +4320,8 @@ static void calculate_totalreserve_pages(void) max = zone->lowmem_reserve[j]; } - /* we treat pages_high as reserved pages. */ - max += zone->pages_high; + /* we treat the high watermark as reserved pages. */ + max += high_wmark_pages(zone); if (max > zone->present_pages) max = zone->present_pages; @@ -4400,7 +4401,7 @@ void setup_per_zone_pages_min(void) * need highmem pages, so cap pages_min to a small * value here. * - * The (pages_high-pages_low) and (pages_low-pages_min) + * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) * deltas controls asynch page reclaim, and so should * not be capped for highmem. */ @@ -4411,17 +4412,17 @@ void setup_per_zone_pages_min(void) min_pages = SWAP_CLUSTER_MAX; if (min_pages > 128) min_pages = 128; - zone->pages_min = min_pages; + zone->watermark[WMARK_MIN] = min_pages; } else { /* * If it's a lowmem zone, reserve a number of pages * proportionate to the zone's size. */ - zone->pages_min = tmp; + zone->watermark[WMARK_MIN] = tmp; } - zone->pages_low = zone->pages_min + (tmp >> 2); - zone->pages_high = zone->pages_min + (tmp >> 1); + zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); + zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -4566,7 +4567,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, * whenever sysctl_lowmem_reserve_ratio changes. * * The reserve ratio obviously has absolutely no relation with the - * pages_min watermarks. The lowmem reserve ratio can only make sense + * minimum watermarks. The lowmem reserve ratio can only make sense * if in function of the boot time zone sizes. */ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, diff --git a/mm/vmscan.c b/mm/vmscan.c index a6b7d14812e..e5245d05164 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1401,7 +1401,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, force-scan anon pages. */ - if (unlikely(file + free <= zone->pages_high)) { + if (unlikely(file + free <= high_wmark_pages(zone))) { percent[0] = 100; percent[1] = 0; return; @@ -1533,11 +1533,13 @@ static void shrink_zone(int priority, struct zone *zone, * try to reclaim pages from zones which will satisfy the caller's allocation * request. * - * We reclaim from a zone even if that zone is over pages_high. Because: + * We reclaim from a zone even if that zone is over high_wmark_pages(zone). + * Because: * a) The caller may be trying to free *extra* pages to satisfy a higher-order * allocation or - * b) The zones may be over pages_high but they must go *over* pages_high to - * satisfy the `incremental min' zone defense algorithm. + * b) The target zone may be at high_wmark_pages(zone) but the lower zones + * must go *over* high_wmark_pages(zone) to satisfy the `incremental min' + * zone defense algorithm. * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. @@ -1743,7 +1745,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, /* * For kswapd, balance_pgdat() will work across all this node's zones until - * they are all at pages_high. + * they are all at high_wmark_pages(zone). * * Returns the number of pages which were actually freed. * @@ -1756,11 +1758,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, * the zone for when the problem goes away. * * kswapd scans the zones in the highmem->normal->dma direction. It skips - * zones which have free_pages > pages_high, but once a zone is found to have - * free_pages <= pages_high, we scan that zone and the lower zones regardless - * of the number of free pages in the lower zones. This interoperates with - * the page allocator fallback scheme to ensure that aging of pages is balanced - * across the zones. + * zones which have free_pages > high_wmark_pages(zone), but once a zone is + * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the + * lower zones regardless of the number of free pages in the lower zones. This + * interoperates with the page allocator fallback scheme to ensure that aging + * of pages is balanced across the zones. */ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) { @@ -1781,7 +1783,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) }; /* * temp_priority is used to remember the scanning priority at which - * this zone was successfully refilled to free_pages == pages_high. + * this zone was successfully refilled to + * free_pages == high_wmark_pages(zone). */ int temp_priority[MAX_NR_ZONES]; @@ -1826,8 +1829,8 @@ loop_again: shrink_active_list(SWAP_CLUSTER_MAX, zone, &sc, priority, 0); - if (!zone_watermark_ok(zone, order, zone->pages_high, - 0, 0)) { + if (!zone_watermark_ok(zone, order, + high_wmark_pages(zone), 0, 0)) { end_zone = i; break; } @@ -1861,8 +1864,8 @@ loop_again: priority != DEF_PRIORITY) continue; - if (!zone_watermark_ok(zone, order, zone->pages_high, - end_zone, 0)) + if (!zone_watermark_ok(zone, order, + high_wmark_pages(zone), end_zone, 0)) all_zones_ok = 0; temp_priority[i] = priority; sc.nr_scanned = 0; @@ -1871,8 +1874,8 @@ loop_again: * We put equal pressure on every zone, unless one * zone has way too many pages free already. */ - if (!zone_watermark_ok(zone, order, 8*zone->pages_high, - end_zone, 0)) + if (!zone_watermark_ok(zone, order, + 8*high_wmark_pages(zone), end_zone, 0)) shrink_zone(priority, zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, @@ -2038,7 +2041,7 @@ void wakeup_kswapd(struct zone *zone, int order) return; pgdat = zone->zone_pgdat; - if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) + if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) return; if (pgdat->kswapd_max_order < order) pgdat->kswapd_max_order = order; diff --git a/mm/vmstat.c b/mm/vmstat.c index 74d66dba0cb..415110772c7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -714,9 +714,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n spanned %lu" "\n present %lu", zone_page_state(zone, NR_FREE_PAGES), - zone->pages_min, - zone->pages_low, - zone->pages_high, + min_wmark_pages(zone), + low_wmark_pages(zone), + high_wmark_pages(zone), zone->pages_scanned, zone->lru[LRU_ACTIVE_ANON].nr_scan, zone->lru[LRU_INACTIVE_ANON].nr_scan, -- cgit v1.2.3-70-g09d2 From 6e08a369ee10b361ac1cdcdf4fabd420fd08beb3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang <fengguang.wu@intel.com> Date: Tue, 16 Jun 2009 15:32:29 -0700 Subject: vmscan: cleanup the scan batching code The vmscan batching logic is twisting. Move it into a standalone function nr_scan_try_batch() and document it. No behavior change. Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Christoph Lameter <cl@linux-foundation.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/mmzone.h | 4 ++-- mm/page_alloc.c | 2 +- mm/vmscan.c | 39 ++++++++++++++++++++++++++++----------- mm/vmstat.c | 8 ++++---- 4 files changed, 35 insertions(+), 18 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index dd8487f0442..db976b9f879 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -334,9 +334,9 @@ struct zone { /* Fields commonly accessed by the page reclaim scanner */ spinlock_t lru_lock; - struct { + struct zone_lru { struct list_head list; - unsigned long nr_scan; + unsigned long nr_saved_scan; /* accumulated for batching */ } lru[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 131655cdb6b..e5b8f628d16 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3657,7 +3657,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone_pcp_init(zone); for_each_lru(l) { INIT_LIST_HEAD(&zone->lru[l].list); - zone->lru[l].nr_scan = 0; + zone->lru[l].nr_saved_scan = 0; } zone->reclaim_stat.recent_rotated[0] = 0; zone->reclaim_stat.recent_rotated[1] = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index 9673437a545..d4da097533c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1492,6 +1492,26 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, percent[1] = 100 - percent[0]; } +/* + * Smallish @nr_to_scan's are deposited in @nr_saved_scan, + * until we collected @swap_cluster_max pages to scan. + */ +static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, + unsigned long *nr_saved_scan, + unsigned long swap_cluster_max) +{ + unsigned long nr; + + *nr_saved_scan += nr_to_scan; + nr = *nr_saved_scan; + + if (nr >= swap_cluster_max) + *nr_saved_scan = 0; + else + nr = 0; + + return nr; +} /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. @@ -1517,14 +1537,11 @@ static void shrink_zone(int priority, struct zone *zone, scan >>= priority; scan = (scan * percent[file]) / 100; } - if (scanning_global_lru(sc)) { - zone->lru[l].nr_scan += scan; - nr[l] = zone->lru[l].nr_scan; - if (nr[l] >= swap_cluster_max) - zone->lru[l].nr_scan = 0; - else - nr[l] = 0; - } else + if (scanning_global_lru(sc)) + nr[l] = nr_scan_try_batch(scan, + &zone->lru[l].nr_saved_scan, + swap_cluster_max); + else nr[l] = scan; } @@ -2124,11 +2141,11 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, l == LRU_ACTIVE_FILE)) continue; - zone->lru[l].nr_scan += (lru_pages >> prio) + 1; - if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { + zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; + if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { unsigned long nr_to_scan; - zone->lru[l].nr_scan = 0; + zone->lru[l].nr_saved_scan = 0; nr_to_scan = min(nr_pages, lru_pages); nr_reclaimed += shrink_list(l, nr_to_scan, zone, sc, prio); diff --git a/mm/vmstat.c b/mm/vmstat.c index 415110772c7..84c05555691 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -718,10 +718,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, low_wmark_pages(zone), high_wmark_pages(zone), zone->pages_scanned, - zone->lru[LRU_ACTIVE_ANON].nr_scan, - zone->lru[LRU_INACTIVE_ANON].nr_scan, - zone->lru[LRU_ACTIVE_FILE].nr_scan, - zone->lru[LRU_INACTIVE_FILE].nr_scan, + zone->lru[LRU_ACTIVE_ANON].nr_saved_scan, + zone->lru[LRU_INACTIVE_ANON].nr_saved_scan, + zone->lru[LRU_ACTIVE_FILE].nr_saved_scan, + zone->lru[LRU_INACTIVE_FILE].nr_saved_scan, zone->spanned_pages, zone->present_pages); -- cgit v1.2.3-70-g09d2 From 6837765963f1723e80ca97b1fae660f3a60d77df Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Date: Tue, 16 Jun 2009 15:32:51 -0700 Subject: mm: remove CONFIG_UNEVICTABLE_LRU config option Currently, nobody wants to turn UNEVICTABLE_LRU off. Thus this configurability is unnecessary. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Andi Kleen <andi@firstfloor.org> Acked-by: Minchan Kim <minchan.kim@gmail.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Matt Mackall <mpm@selenic.com> Cc: Rik van Riel <riel@redhat.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/base/node.c | 4 ---- fs/proc/meminfo.c | 4 ---- fs/proc/page.c | 2 -- include/linux/mmzone.h | 13 ------------- include/linux/page-flags.h | 16 +--------------- include/linux/pagemap.h | 12 ------------ include/linux/rmap.h | 7 ------- include/linux/swap.h | 19 ------------------- include/linux/vmstat.h | 2 -- kernel/sysctl.c | 2 -- mm/Kconfig | 14 +------------- mm/internal.h | 6 ------ mm/mlock.c | 22 ---------------------- mm/page_alloc.c | 9 --------- mm/rmap.c | 3 +-- mm/vmscan.c | 17 ----------------- mm/vmstat.c | 4 ---- 17 files changed, 3 insertions(+), 153 deletions(-) (limited to 'include/linux/mmzone.h') diff --git a/drivers/base/node.c b/drivers/base/node.c index 40b809742a1..91d4087b403 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -72,10 +72,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d Inactive(anon): %8lu kB\n" "Node %d Active(file): %8lu kB\n" "Node %d Inactive(file): %8lu kB\n" -#ifdef CONFIG_UNEVICTABLE_LRU "Node %d Unevictable: %8lu kB\n" "Node %d Mlocked: %8lu kB\n" -#endif #ifdef CONFIG_HIGHMEM "Node %d HighTotal: %8lu kB\n" "Node %d HighFree: %8lu kB\n" @@ -105,10 +103,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_INACTIVE_ANON)), nid, K(node_page_state(nid, NR_ACTIVE_FILE)), nid, K(node_page_state(nid, NR_INACTIVE_FILE)), -#ifdef CONFIG_UNEVICTABLE_LRU nid, K(node_page_state(nid, NR_UNEVICTABLE)), nid, K(node_page_state(nid, NR_MLOCK)), -#endif #ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), nid, K(i.freehigh), diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c6b0302af4c..d5c410d47fa 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -64,10 +64,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Inactive(anon): %8lu kB\n" "Active(file): %8lu kB\n" "Inactive(file): %8lu kB\n" -#ifdef CONFIG_UNEVICTABLE_LRU "Unevictable: %8lu kB\n" "Mlocked: %8lu kB\n" -#endif #ifdef CONFIG_HIGHMEM "HighTotal: %8lu kB\n" "HighFree: %8lu kB\n" @@ -109,10 +107,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(pages[LRU_INACTIVE_ANON]), K(pages[LRU_ACTIVE_FILE]), K(pages[LRU_INACTIVE_FILE]), -#ifdef CONFIG_UNEVICTABLE_LRU K(pages[LRU_UNEVICTABLE]), K(global_page_state(NR_MLOCK)), -#endif #ifdef CONFIG_HIGHMEM K(i.totalhigh), K(i.freehigh), diff --git a/fs/proc/page.c b/fs/proc/page.c index 9d926bd279a..2707c6c7a20 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -172,10 +172,8 @@ static u64 get_uflags(struct page *page) u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); -#ifdef CONFIG_UNEVICTABLE_LRU u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); -#endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index db976b9f879..88959853737 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -83,13 +83,8 @@ enum zone_stat_item { NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ -#ifdef CONFIG_UNEVICTABLE_LRU NR_UNEVICTABLE, /* " " " " " */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ -#else - NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */ - NR_MLOCK = NR_ACTIVE_FILE, -#endif NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ @@ -132,11 +127,7 @@ enum lru_list { LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, -#ifdef CONFIG_UNEVICTABLE_LRU LRU_UNEVICTABLE, -#else - LRU_UNEVICTABLE = LRU_ACTIVE_FILE, /* avoid compiler errors in dead code */ -#endif NR_LRU_LISTS }; @@ -156,11 +147,7 @@ static inline int is_active_lru(enum lru_list l) static inline int is_unevictable_lru(enum lru_list l) { -#ifdef CONFIG_UNEVICTABLE_LRU return (l == LRU_UNEVICTABLE); -#else - return 0; -#endif } enum zone_watermarks { diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 62214c7d2d9..d6792f88a17 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -95,9 +95,7 @@ enum pageflags { PG_reclaim, /* To be reclaimed asap */ PG_buddy, /* Page is free, on buddy lists */ PG_swapbacked, /* Page is backed by RAM/swap */ -#ifdef CONFIG_UNEVICTABLE_LRU PG_unevictable, /* Page is "unevictable" */ -#endif #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT PG_mlocked, /* Page is vma mlocked */ #endif @@ -248,14 +246,8 @@ PAGEFLAG_FALSE(SwapCache) SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache) #endif -#ifdef CONFIG_UNEVICTABLE_LRU PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) TESTCLEARFLAG(Unevictable, unevictable) -#else -PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) - SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) - __CLEARPAGEFLAG_NOOP(Unevictable) -#endif #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define MLOCK_PAGES 1 @@ -382,12 +374,6 @@ static inline void __ClearPageTail(struct page *page) #endif /* !PAGEFLAGS_EXTENDED */ -#ifdef CONFIG_UNEVICTABLE_LRU -#define __PG_UNEVICTABLE (1 << PG_unevictable) -#else -#define __PG_UNEVICTABLE 0 -#endif - #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define __PG_MLOCKED (1 << PG_mlocked) #else @@ -403,7 +389,7 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - __PG_UNEVICTABLE | __PG_MLOCKED) + 1 << PG_unevictable | __PG_MLOCKED) /* * Flags checked when a page is prepped for return by the page allocator. diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 34da5230faa..aec3252afcf 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -22,9 +22,7 @@ enum mapping_flags { AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */ AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */ AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */ -#ifdef CONFIG_UNEVICTABLE_LRU AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */ -#endif }; static inline void mapping_set_error(struct address_space *mapping, int error) @@ -37,8 +35,6 @@ static inline void mapping_set_error(struct address_space *mapping, int error) } } -#ifdef CONFIG_UNEVICTABLE_LRU - static inline void mapping_set_unevictable(struct address_space *mapping) { set_bit(AS_UNEVICTABLE, &mapping->flags); @@ -55,14 +51,6 @@ static inline int mapping_unevictable(struct address_space *mapping) return test_bit(AS_UNEVICTABLE, &mapping->flags); return !!mapping; } -#else -static inline void mapping_set_unevictable(struct address_space *mapping) { } -static inline void mapping_clear_unevictable(struct address_space *mapping) { } -static inline int mapping_unevictable(struct address_space *mapping) -{ - return 0; -} -#endif static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b35bc0e19cd..619379a1dd9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -105,18 +105,11 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int page_mkclean(struct page *); -#ifdef CONFIG_UNEVICTABLE_LRU /* * called in munlock()/munmap() path to check for other vmas holding * the page mlocked. */ int try_to_munlock(struct page *); -#else -static inline int try_to_munlock(struct page *page) -{ - return 0; /* a.k.a. SWAP_SUCCESS */ -} -#endif #else /* !CONFIG_MMU */ diff --git a/include/linux/swap.h b/include/linux/swap.h index d476aad3ff5..f30c06908f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -235,7 +235,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) } #endif -#ifdef CONFIG_UNEVICTABLE_LRU extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); @@ -244,24 +243,6 @@ extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); -#else -static inline int page_evictable(struct page *page, - struct vm_area_struct *vma) -{ - return 1; -} - -static inline void scan_mapping_unevictable_pages(struct address_space *mapping) -{ -} - -static inline int scan_unevictable_register_node(struct node *node) -{ - return 0; -} - -static inline void scan_unevictable_unregister_node(struct node *node) { } -#endif extern int kswapd_run(int nid); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 524cd1b28ec..ff4696c6dce 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -41,7 +41,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, #endif -#ifdef CONFIG_UNEVICTABLE_LRU UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */ @@ -50,7 +49,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ UNEVICTABLE_MLOCKFREED, -#endif NR_VM_EVENT_ITEMS }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0e51a35a448..2ccee08f92f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1325,7 +1325,6 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif -#ifdef CONFIG_UNEVICTABLE_LRU { .ctl_name = CTL_UNNUMBERED, .procname = "scan_unevictable_pages", @@ -1334,7 +1333,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &scan_unevictable_handler, }, -#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff --git a/mm/Kconfig b/mm/Kconfig index 71830ba7b98..97d2c88b745 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -203,25 +203,13 @@ config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS -config UNEVICTABLE_LRU - bool "Add LRU list to track non-evictable pages" - default y - help - Keeps unevictable pages off of the active and inactive pageout - lists, so kswapd will not waste CPU time or have its balancing - algorithms thrown off by scanning these pages. Selecting this - will use one page flag and increase the code size a little, - say Y unless you know what you are doing. - - See Documentation/vm/unevictable-lru.txt for more information. - config HAVE_MLOCK bool default y if MMU=y config HAVE_MLOCKED_PAGE_BIT bool - default y if HAVE_MLOCK=y && UNEVICTABLE_LRU=y + default y if HAVE_MLOCK=y config MMU_NOTIFIER bool diff --git a/mm/internal.h b/mm/internal.h index b4ac332e807..f02c7508068 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -73,7 +73,6 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) } #endif -#ifdef CONFIG_UNEVICTABLE_LRU /* * unevictable_migrate_page() called only from migrate_page_copy() to * migrate unevictable flag to new page. @@ -85,11 +84,6 @@ static inline void unevictable_migrate_page(struct page *new, struct page *old) if (TestClearPageUnevictable(old)) SetPageUnevictable(new); } -#else -static inline void unevictable_migrate_page(struct page *new, struct page *old) -{ -} -#endif #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT /* diff --git a/mm/mlock.c b/mm/mlock.c index ac130433c7d..45eb650b965 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -31,7 +31,6 @@ int can_do_mlock(void) } EXPORT_SYMBOL(can_do_mlock); -#ifdef CONFIG_UNEVICTABLE_LRU /* * Mlocked pages are marked with PageMlocked() flag for efficient testing * in vmscan and, possibly, the fault path; and to support semi-accurate @@ -261,27 +260,6 @@ static int __mlock_posix_error_return(long retval) return retval; } -#else /* CONFIG_UNEVICTABLE_LRU */ - -/* - * Just make pages present if VM_LOCKED. No-op if unlocking. - */ -static long __mlock_vma_pages_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - int mlock) -{ - if (mlock && (vma->vm_flags & VM_LOCKED)) - return make_pages_present(start, end); - return 0; -} - -static inline int __mlock_posix_error_return(long retval) -{ - return 0; -} - -#endif /* CONFIG_UNEVICTABLE_LRU */ - /** * mlock_vma_pages_range() - mlock pages in specified vma range. * @vma - the vma containing the specfied address range diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 00e293734fc..c95a77cd581 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2077,19 +2077,14 @@ void show_free_areas(void) printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" " inactive_file:%lu" -//TODO: check/adjust line lengths -#ifdef CONFIG_UNEVICTABLE_LRU " unevictable:%lu" -#endif " dirty:%lu writeback:%lu unstable:%lu\n" " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), global_page_state(NR_INACTIVE_FILE), -#ifdef CONFIG_UNEVICTABLE_LRU global_page_state(NR_UNEVICTABLE), -#endif global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), @@ -2113,9 +2108,7 @@ void show_free_areas(void) " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" -#ifdef CONFIG_UNEVICTABLE_LRU " unevictable:%lukB" -#endif " present:%lukB" " pages_scanned:%lu" " all_unreclaimable? %s" @@ -2129,9 +2122,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_INACTIVE_ANON)), K(zone_page_state(zone, NR_ACTIVE_FILE)), K(zone_page_state(zone, NR_INACTIVE_FILE)), -#ifdef CONFIG_UNEVICTABLE_LRU K(zone_page_state(zone, NR_UNEVICTABLE)), -#endif K(zone->present_pages), zone->pages_scanned, (zone_is_all_unreclaimable(zone) ? "yes" : "no") diff --git a/mm/rmap.c b/mm/rmap.c index 23122af3261..316c9d6930a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1202,7 +1202,6 @@ int try_to_unmap(struct page *page, int migration) return ret; } -#ifdef CONFIG_UNEVICTABLE_LRU /** * try_to_munlock - try to munlock a page * @page: the page to be munlocked @@ -1226,4 +1225,4 @@ int try_to_munlock(struct page *page) else return try_to_unmap_file(page, 1, 0); } -#endif + diff --git a/mm/vmscan.c b/mm/vmscan.c index 879d034930c..2c4b945b011 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -514,7 +514,6 @@ int remove_mapping(struct address_space *mapping, struct page *page) * * lru_lock must not be held, interrupts must be enabled. */ -#ifdef CONFIG_UNEVICTABLE_LRU void putback_lru_page(struct page *page) { int lru; @@ -568,20 +567,6 @@ redo: put_page(page); /* drop ref from isolate */ } -#else /* CONFIG_UNEVICTABLE_LRU */ - -void putback_lru_page(struct page *page) -{ - int lru; - VM_BUG_ON(PageLRU(page)); - - lru = !!TestClearPageActive(page) + page_is_file_cache(page); - lru_cache_add_lru(page, lru); - put_page(page); -} -#endif /* CONFIG_UNEVICTABLE_LRU */ - - /* * shrink_page_list() returns the number of reclaimed pages */ @@ -2470,7 +2455,6 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) } #endif -#ifdef CONFIG_UNEVICTABLE_LRU /* * page_evictable - test whether a page is evictable * @page: the page to test @@ -2717,4 +2701,3 @@ void scan_unevictable_unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); } -#endif diff --git a/mm/vmstat.c b/mm/vmstat.c index 1e151cf6bf8..1e3aa8139f2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -629,10 +629,8 @@ static const char * const vmstat_text[] = { "nr_active_anon", "nr_inactive_file", "nr_active_file", -#ifdef CONFIG_UNEVICTABLE_LRU "nr_unevictable", "nr_mlock", -#endif "nr_anon_pages", "nr_mapped", "nr_file_pages", @@ -687,7 +685,6 @@ static const char * const vmstat_text[] = { "htlb_buddy_alloc_success", "htlb_buddy_alloc_fail", #endif -#ifdef CONFIG_UNEVICTABLE_LRU "unevictable_pgs_culled", "unevictable_pgs_scanned", "unevictable_pgs_rescued", @@ -697,7 +694,6 @@ static const char * const vmstat_text[] = { "unevictable_pgs_stranded", "unevictable_pgs_mlockfreed", #endif -#endif }; static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, -- cgit v1.2.3-70-g09d2