From 4f98a2fee8acdb4ac84545df98cccecfd130f8db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 18 Oct 2008 20:26:32 -0700 Subject: vmscan: split LRU lists into anon & file sets Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 56 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 23 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 5116b78c632..fc7e9bf0cdb 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -61,34 +61,44 @@ static ssize_t node_read_meminfo(struct sys_device * dev, si_meminfo_node(&i, nid); n = sprintf(buf, "\n" - "Node %d MemTotal: %8lu kB\n" - "Node %d MemFree: %8lu kB\n" - "Node %d MemUsed: %8lu kB\n" - "Node %d Active: %8lu kB\n" - "Node %d Inactive: %8lu kB\n" + "Node %d MemTotal: %8lu kB\n" + "Node %d MemFree: %8lu kB\n" + "Node %d MemUsed: %8lu kB\n" + "Node %d Active: %8lu kB\n" + "Node %d Inactive: %8lu kB\n" + "Node %d Active(anon): %8lu kB\n" + "Node %d Inactive(anon): %8lu kB\n" + "Node %d Active(file): %8lu kB\n" + "Node %d Inactive(file): %8lu kB\n" #ifdef CONFIG_HIGHMEM - "Node %d HighTotal: %8lu kB\n" - "Node %d HighFree: %8lu kB\n" - "Node %d LowTotal: %8lu kB\n" - "Node %d LowFree: %8lu kB\n" + "Node %d HighTotal: %8lu kB\n" + "Node %d HighFree: %8lu kB\n" + "Node %d LowTotal: %8lu kB\n" + "Node %d LowFree: %8lu kB\n" #endif - "Node %d Dirty: %8lu kB\n" - "Node %d Writeback: %8lu kB\n" - "Node %d FilePages: %8lu kB\n" - "Node %d Mapped: %8lu kB\n" - "Node %d AnonPages: %8lu kB\n" - "Node %d PageTables: %8lu kB\n" - "Node %d NFS_Unstable: %8lu kB\n" - "Node %d Bounce: %8lu kB\n" - "Node %d WritebackTmp: %8lu kB\n" - "Node %d Slab: %8lu kB\n" - "Node %d SReclaimable: %8lu kB\n" - "Node %d SUnreclaim: %8lu kB\n", + "Node %d Dirty: %8lu kB\n" + "Node %d Writeback: %8lu kB\n" + "Node %d FilePages: %8lu kB\n" + "Node %d Mapped: %8lu kB\n" + "Node %d AnonPages: %8lu kB\n" + "Node %d PageTables: %8lu kB\n" + "Node %d NFS_Unstable: %8lu kB\n" + "Node %d Bounce: %8lu kB\n" + "Node %d WritebackTmp: %8lu kB\n" + "Node %d Slab: %8lu kB\n" + "Node %d SReclaimable: %8lu kB\n" + "Node %d SUnreclaim: %8lu kB\n", nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), - nid, K(node_page_state(nid, NR_ACTIVE)), - nid, K(node_page_state(nid, NR_INACTIVE)), + nid, K(node_page_state(nid, NR_ACTIVE_ANON) + + node_page_state(nid, NR_ACTIVE_FILE)), + nid, K(node_page_state(nid, NR_INACTIVE_ANON) + + node_page_state(nid, NR_INACTIVE_FILE)), + nid, K(node_page_state(nid, NR_ACTIVE_ANON)), + nid, K(node_page_state(nid, NR_INACTIVE_ANON)), + nid, K(node_page_state(nid, NR_ACTIVE_FILE)), + nid, K(node_page_state(nid, NR_INACTIVE_FILE)), #ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), nid, K(i.freehigh), -- cgit v1.2.3-70-g09d2 From 7b854121eb3e5ba0241882ff939e2c485228c9c5 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:40 -0700 Subject: Unevictable LRU Page Statistics Report unevictable pages per zone and system wide. Kosaki Motohiro added support for memory controller unevictable statistics. [riel@redhat.com: fix printk in show_free_areas()] [akpm@linux-foundation.org: fix units in /proc/vmstats] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Debugged-by: Hiroshi Shimamoto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 6 ++++++ fs/proc/proc_misc.c | 6 ++++++ mm/memcontrol.c | 6 ++++++ mm/page_alloc.c | 18 ++++++++++++++++-- mm/vmstat.c | 3 +++ 5 files changed, 37 insertions(+), 2 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index fc7e9bf0cdb..11a9a05cf55 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -70,6 +70,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d Inactive(anon): %8lu kB\n" "Node %d Active(file): %8lu kB\n" "Node %d Inactive(file): %8lu kB\n" +#ifdef CONFIG_UNEVICTABLE_LRU + "Node %d Noreclaim: %8lu kB\n" +#endif #ifdef CONFIG_HIGHMEM "Node %d HighTotal: %8lu kB\n" "Node %d HighFree: %8lu kB\n" @@ -99,6 +102,9 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_INACTIVE_ANON)), nid, K(node_page_state(nid, NR_ACTIVE_FILE)), nid, K(node_page_state(nid, NR_INACTIVE_FILE)), +#ifdef CONFIG_UNEVICTABLE_LRU + nid, K(node_page_state(nid, NR_UNEVICTABLE)), +#endif #ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), nid, K(i.freehigh), diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index b8edb286055..6dd60eaea99 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -174,6 +174,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "Inactive(anon): %8lu kB\n" "Active(file): %8lu kB\n" "Inactive(file): %8lu kB\n" +#ifdef CONFIG_UNEVICTABLE_LRU + "Unevictable: %8lu kB\n" +#endif #ifdef CONFIG_HIGHMEM "HighTotal: %8lu kB\n" "HighFree: %8lu kB\n" @@ -212,6 +215,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(pages[LRU_INACTIVE_ANON]), K(pages[LRU_ACTIVE_FILE]), K(pages[LRU_INACTIVE_FILE]), +#ifdef CONFIG_UNEVICTABLE_LRU + K(pages[LRU_UNEVICTABLE]), +#endif #ifdef CONFIG_HIGHMEM K(i.totalhigh), K(i.freehigh), diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 82c065e7551..e93a4db93fb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1006,6 +1006,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, { unsigned long active_anon, inactive_anon; unsigned long active_file, inactive_file; + unsigned long unevictable; inactive_anon = mem_cgroup_get_all_zonestat(mem_cont, LRU_INACTIVE_ANON); @@ -1015,10 +1016,15 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, LRU_INACTIVE_FILE); active_file = mem_cgroup_get_all_zonestat(mem_cont, LRU_ACTIVE_FILE); + unevictable = mem_cgroup_get_all_zonestat(mem_cont, + LRU_UNEVICTABLE); + cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE); cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE); cb->fill(cb, "active_file", (active_file) * PAGE_SIZE); cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE); + cb->fill(cb, "unevictable", unevictable * PAGE_SIZE); + } return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 79c0981b1d3..4125230a1b2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1864,13 +1864,21 @@ void show_free_areas(void) } } - printk("Active_anon:%lu active_file:%lu inactive_anon%lu\n" - " inactive_file:%lu dirty:%lu writeback:%lu unstable:%lu\n" + printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n" + " inactive_file:%lu" +//TODO: check/adjust line lengths +#ifdef CONFIG_UNEVICTABLE_LRU + " unevictable:%lu" +#endif + " dirty:%lu writeback:%lu unstable:%lu\n" " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), global_page_state(NR_ACTIVE_FILE), global_page_state(NR_INACTIVE_ANON), global_page_state(NR_INACTIVE_FILE), +#ifdef CONFIG_UNEVICTABLE_LRU + global_page_state(NR_UNEVICTABLE), +#endif global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), @@ -1897,6 +1905,9 @@ void show_free_areas(void) " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" +#ifdef CONFIG_UNEVICTABLE_LRU + " unevictable:%lukB" +#endif " present:%lukB" " pages_scanned:%lu" " all_unreclaimable? %s" @@ -1910,6 +1921,9 @@ void show_free_areas(void) K(zone_page_state(zone, NR_INACTIVE_ANON)), K(zone_page_state(zone, NR_ACTIVE_FILE)), K(zone_page_state(zone, NR_INACTIVE_FILE)), +#ifdef CONFIG_UNEVICTABLE_LRU + K(zone_page_state(zone, NR_UNEVICTABLE)), +#endif K(zone->present_pages), zone->pages_scanned, (zone_is_all_unreclaimable(zone) ? "yes" : "no") diff --git a/mm/vmstat.c b/mm/vmstat.c index 6cb08cdd4f0..6db2f631931 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -623,6 +623,9 @@ static const char * const vmstat_text[] = { "nr_active_anon", "nr_inactive_file", "nr_active_file", +#ifdef CONFIG_UNEVICTABLE_LRU + "nr_unevictable", +#endif "nr_anon_pages", "nr_mapped", "nr_file_pages", -- cgit v1.2.3-70-g09d2 From 5344b7e648980cc2ca613ec03a56a8222ff48820 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:51 -0700 Subject: vmstat: mlocked pages statistics Add NR_MLOCK zone page state, which provides a (conservative) count of mlocked pages (actually, the number of mlocked pages moved off the LRU). Reworked by lts to fit in with the modified mlock page support in the Reclaim Scalability series. [kosaki.motohiro@jp.fujitsu.com: fix incorrect Mlocked field of /proc/meminfo] [lee.schermerhorn@hp.com: mlocked-pages: add event counting with statistics] Signed-off-by: Nick Piggin Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 4 +++- fs/proc/proc_misc.c | 2 ++ include/linux/mmzone.h | 2 ++ include/linux/vmstat.h | 4 ++++ mm/internal.h | 16 +++++++++++++--- mm/mlock.c | 41 ++++++++++++++++++++++++++++++++++++----- mm/vmstat.c | 5 +++++ 7 files changed, 65 insertions(+), 9 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 11a9a05cf55..fb45d88a244 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -71,7 +71,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d Active(file): %8lu kB\n" "Node %d Inactive(file): %8lu kB\n" #ifdef CONFIG_UNEVICTABLE_LRU - "Node %d Noreclaim: %8lu kB\n" + "Node %d Unevictable: %8lu kB\n" + "Node %d Mlocked: %8lu kB\n" #endif #ifdef CONFIG_HIGHMEM "Node %d HighTotal: %8lu kB\n" @@ -104,6 +105,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(node_page_state(nid, NR_INACTIVE_FILE)), #ifdef CONFIG_UNEVICTABLE_LRU nid, K(node_page_state(nid, NR_UNEVICTABLE)), + nid, K(node_page_state(nid, NR_MLOCK)), #endif #ifdef CONFIG_HIGHMEM nid, K(i.totalhigh), diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 6dd60eaea99..61b25f4eabe 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -176,6 +176,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "Inactive(file): %8lu kB\n" #ifdef CONFIG_UNEVICTABLE_LRU "Unevictable: %8lu kB\n" + "Mlocked: %8lu kB\n" #endif #ifdef CONFIG_HIGHMEM "HighTotal: %8lu kB\n" @@ -217,6 +218,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(pages[LRU_INACTIVE_FILE]), #ifdef CONFIG_UNEVICTABLE_LRU K(pages[LRU_UNEVICTABLE]), + K(global_page_state(NR_MLOCK)), #endif #ifdef CONFIG_HIGHMEM K(i.totalhigh), diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d1f60d5fe2e..da2d053a95f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -88,8 +88,10 @@ enum zone_stat_item { NR_ACTIVE_FILE, /* " " " " " */ #ifdef CONFIG_UNEVICTABLE_LRU NR_UNEVICTABLE, /* " " " " " */ + NR_MLOCK, /* mlock()ed pages found and moved off LRU */ #else NR_UNEVICTABLE = NR_ACTIVE_FILE, /* avoid compiler errors in dead code */ + NR_MLOCK = NR_ACTIVE_FILE, #endif NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 135840cd7fe..05b805020be 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -45,6 +45,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */ + UNEVICTABLE_PGMLOCKED, + UNEVICTABLE_PGMUNLOCKED, + UNEVICTABLE_PGCLEARED, /* on COW, page truncate */ + UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */ #endif NR_VM_EVENT_ITEMS }; diff --git a/mm/internal.h b/mm/internal.h index 48e32f79057..1cfbf2e2bc9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -101,7 +101,10 @@ static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) return 0; - SetPageMlocked(page); + if (!TestSetPageMlocked(page)) { + inc_zone_page_state(page, NR_MLOCK); + count_vm_event(UNEVICTABLE_PGMLOCKED); + } return 1; } @@ -128,12 +131,19 @@ static inline void clear_page_mlock(struct page *page) /* * mlock_migrate_page - called only from migrate_page_copy() to - * migrate the Mlocked page flag + * migrate the Mlocked page flag; update statistics. */ static inline void mlock_migrate_page(struct page *newpage, struct page *page) { - if (TestClearPageMlocked(page)) + if (TestClearPageMlocked(page)) { + unsigned long flags; + + local_irq_save(flags); + __dec_zone_page_state(page, NR_MLOCK); SetPageMlocked(newpage); + __inc_zone_page_state(newpage, NR_MLOCK); + local_irq_restore(flags); + } } diff --git a/mm/mlock.c b/mm/mlock.c index 8b478350a2a..bce1b22c36c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -60,6 +60,8 @@ void __clear_page_mlock(struct page *page) return; } + dec_zone_page_state(page, NR_MLOCK); + count_vm_event(UNEVICTABLE_PGCLEARED); if (!isolate_lru_page(page)) { putback_lru_page(page); } else { @@ -69,6 +71,9 @@ void __clear_page_mlock(struct page *page) lru_add_drain_all(); if (!isolate_lru_page(page)) putback_lru_page(page); + else if (PageUnevictable(page)) + count_vm_event(UNEVICTABLE_PGSTRANDED); + } } @@ -80,8 +85,12 @@ void mlock_vma_page(struct page *page) { BUG_ON(!PageLocked(page)); - if (!TestSetPageMlocked(page) && !isolate_lru_page(page)) - putback_lru_page(page); + if (!TestSetPageMlocked(page)) { + inc_zone_page_state(page, NR_MLOCK); + count_vm_event(UNEVICTABLE_PGMLOCKED); + if (!isolate_lru_page(page)) + putback_lru_page(page); + } } /* @@ -106,9 +115,31 @@ static void munlock_vma_page(struct page *page) { BUG_ON(!PageLocked(page)); - if (TestClearPageMlocked(page) && !isolate_lru_page(page)) { - try_to_munlock(page); - putback_lru_page(page); + if (TestClearPageMlocked(page)) { + dec_zone_page_state(page, NR_MLOCK); + if (!isolate_lru_page(page)) { + int ret = try_to_munlock(page); + /* + * did try_to_unlock() succeed or punt? + */ + if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN) + count_vm_event(UNEVICTABLE_PGMUNLOCKED); + + putback_lru_page(page); + } else { + /* + * We lost the race. let try_to_unmap() deal + * with it. At least we get the page state and + * mlock stats right. However, page is still on + * the noreclaim list. We'll fix that up when + * the page is eventually freed or we scan the + * noreclaim list. + */ + if (PageUnevictable(page)) + count_vm_event(UNEVICTABLE_PGSTRANDED); + else + count_vm_event(UNEVICTABLE_PGMUNLOCKED); + } } } diff --git a/mm/vmstat.c b/mm/vmstat.c index 6db2f631931..9e28abc0a0b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -625,6 +625,7 @@ static const char * const vmstat_text[] = { "nr_active_file", #ifdef CONFIG_UNEVICTABLE_LRU "nr_unevictable", + "nr_mlock", #endif "nr_anon_pages", "nr_mapped", @@ -684,6 +685,10 @@ static const char * const vmstat_text[] = { "unevictable_pgs_culled", "unevictable_pgs_scanned", "unevictable_pgs_rescued", + "unevictable_pgs_mlocked", + "unevictable_pgs_munlocked", + "unevictable_pgs_cleared", + "unevictable_pgs_stranded", #endif #endif }; -- cgit v1.2.3-70-g09d2 From af936a1606246a10c145feac3770f6287f483f02 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Sat, 18 Oct 2008 20:26:53 -0700 Subject: vmscan: unevictable LRU scan sysctl This patch adds a function to scan individual or all zones' unevictable lists and move any pages that have become evictable onto the respective zone's inactive list, where shrink_inactive_list() will deal with them. Adds sysctl to scan all nodes, and per node attributes to individual nodes' zones. Kosaki: If evictable page found in unevictable lru when write /proc/sys/vm/scan_unevictable_pages, print filename and file offset of these pages. [akpm@linux-foundation.org: fix one CONFIG_MMU=n build error] [kosaki.motohiro@jp.fujitsu.com: adapt vmscan-unevictable-lru-scan-sysctl.patch to new sysfs API] Signed-off-by: Lee Schermerhorn Signed-off-by: Rik van Riel Signed-off-by: KOSAKI Motohiro Signed-off-by: KOSAKI Motohiro Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 5 ++ include/linux/rmap.h | 3 + include/linux/swap.h | 15 +++++ kernel/sysctl.c | 10 ++++ mm/rmap.c | 4 +- mm/vmscan.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 201 insertions(+), 2 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index fb45d88a244..f5207090885 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct sysdev_class node_class = { .name = "node", @@ -191,6 +192,8 @@ int register_node(struct node *node, int num, struct node *parent) sysdev_create_file(&node->sysdev, &attr_meminfo); sysdev_create_file(&node->sysdev, &attr_numastat); sysdev_create_file(&node->sysdev, &attr_distance); + + scan_unevictable_register_node(node); } return error; } @@ -210,6 +213,8 @@ void unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_numastat); sysdev_remove_file(&node->sysdev, &attr_distance); + scan_unevictable_unregister_node(node); + sysdev_unregister(&node->sysdev); } diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 955667e6a52..1da48db8db0 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -75,6 +75,9 @@ void anon_vma_unlink(struct vm_area_struct *); void anon_vma_link(struct vm_area_struct *); void __anon_vma_link(struct vm_area_struct *); +extern struct anon_vma *page_lock_anon_vma(struct page *page); +extern void page_unlock_anon_vma(struct anon_vma *anon_vma); + /* * rmap interfaces called when adding or removing pte of page */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 07eda69412f..a3af95b2cb6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -235,15 +236,29 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) #ifdef CONFIG_UNEVICTABLE_LRU extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); + +extern unsigned long scan_unevictable_pages; +extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +extern int scan_unevictable_register_node(struct node *node); +extern void scan_unevictable_unregister_node(struct node *node); #else static inline int page_evictable(struct page *page, struct vm_area_struct *vma) { return 1; } + static inline void scan_mapping_unevictable_pages(struct address_space *mapping) { } + +static inline int scan_unevictable_register_node(struct node *node) +{ + return 0; +} + +static inline void scan_unevictable_unregister_node(struct node *node) { } #endif extern int kswapd_run(int nid); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 617d41e4d6a..b3cc73931d1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -833,6 +833,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_UNEVICTABLE_LRU + { + .ctl_name = CTL_UNNUMBERED, + .procname = "scan_unevictable_pages", + .data = &scan_unevictable_pages, + .maxlen = sizeof(scan_unevictable_pages), + .mode = 0644, + .proc_handler = &scan_unevictable_handler, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff --git a/mm/rmap.c b/mm/rmap.c index 7e60df99018..7e90bebbeb6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -181,7 +181,7 @@ void __init anon_vma_init(void) * Getting a lock on a stable anon_vma from a page off the LRU is * tricky: page_lock_anon_vma rely on RCU to guard against the races. */ -static struct anon_vma *page_lock_anon_vma(struct page *page) +struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma; unsigned long anon_mapping; @@ -201,7 +201,7 @@ out: return NULL; } -static void page_unlock_anon_vma(struct anon_vma *anon_vma) +void page_unlock_anon_vma(struct anon_vma *anon_vma) { spin_unlock(&anon_vma->lock); rcu_read_unlock(); diff --git a/mm/vmscan.c b/mm/vmscan.c index e5aaaad159e..ca64e3e0c51 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -2363,6 +2364,39 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) return 1; } +static void show_page_path(struct page *page) +{ + char buf[256]; + if (page_is_file_cache(page)) { + struct address_space *mapping = page->mapping; + struct dentry *dentry; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + + spin_lock(&mapping->i_mmap_lock); + dentry = d_find_alias(mapping->host); + printk(KERN_INFO "rescued: %s %lu\n", + dentry_path(dentry, buf, 256), pgoff); + spin_unlock(&mapping->i_mmap_lock); + } else { +#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU) + struct anon_vma *anon_vma; + struct vm_area_struct *vma; + + anon_vma = page_lock_anon_vma(page); + if (!anon_vma) + return; + + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { + printk(KERN_INFO "rescued: anon %s\n", + vma->vm_mm->owner->comm); + break; + } + page_unlock_anon_vma(anon_vma); +#endif + } +} + + /** * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list * @page: page to check evictability and move to appropriate lru list @@ -2382,6 +2416,9 @@ retry: ClearPageUnevictable(page); if (page_evictable(page, NULL)) { enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); + + show_page_path(page); + __dec_zone_state(zone, NR_UNEVICTABLE); list_move(&page->lru, &zone->lru[l].list); __inc_zone_state(zone, NR_INACTIVE_ANON + l); @@ -2451,4 +2488,133 @@ void scan_mapping_unevictable_pages(struct address_space *mapping) } } + +/** + * scan_zone_unevictable_pages - check unevictable list for evictable pages + * @zone - zone of which to scan the unevictable list + * + * Scan @zone's unevictable LRU lists to check for pages that have become + * evictable. Move those that have to @zone's inactive list where they + * become candidates for reclaim, unless shrink_inactive_zone() decides + * to reactivate them. Pages that are still unevictable are rotated + * back onto @zone's unevictable list. + */ +#define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ +void scan_zone_unevictable_pages(struct zone *zone) +{ + struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; + unsigned long scan; + unsigned long nr_to_scan = zone_page_state(zone, NR_UNEVICTABLE); + + while (nr_to_scan > 0) { + unsigned long batch_size = min(nr_to_scan, + SCAN_UNEVICTABLE_BATCH_SIZE); + + spin_lock_irq(&zone->lru_lock); + for (scan = 0; scan < batch_size; scan++) { + struct page *page = lru_to_page(l_unevictable); + + if (!trylock_page(page)) + continue; + + prefetchw_prev_lru_page(page, l_unevictable, flags); + + if (likely(PageLRU(page) && PageUnevictable(page))) + check_move_unevictable_page(page, zone); + + unlock_page(page); + } + spin_unlock_irq(&zone->lru_lock); + + nr_to_scan -= batch_size; + } +} + + +/** + * scan_all_zones_unevictable_pages - scan all unevictable lists for evictable pages + * + * A really big hammer: scan all zones' unevictable LRU lists to check for + * pages that have become evictable. Move those back to the zones' + * inactive list where they become candidates for reclaim. + * This occurs when, e.g., we have unswappable pages on the unevictable lists, + * and we add swap to the system. As such, it runs in the context of a task + * that has possibly/probably made some previously unevictable pages + * evictable. + */ +void scan_all_zones_unevictable_pages(void) +{ + struct zone *zone; + + for_each_zone(zone) { + scan_zone_unevictable_pages(zone); + } +} + +/* + * scan_unevictable_pages [vm] sysctl handler. On demand re-scan of + * all nodes' unevictable lists for evictable pages + */ +unsigned long scan_unevictable_pages; + +int scan_unevictable_handler(struct ctl_table *table, int write, + struct file *file, void __user *buffer, + size_t *length, loff_t *ppos) +{ + proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + + if (write && *(unsigned long *)table->data) + scan_all_zones_unevictable_pages(); + + scan_unevictable_pages = 0; + return 0; +} + +/* + * per node 'scan_unevictable_pages' attribute. On demand re-scan of + * a specified node's per zone unevictable lists for evictable pages. + */ + +static ssize_t read_scan_unevictable_node(struct sys_device *dev, + struct sysdev_attribute *attr, + char *buf) +{ + return sprintf(buf, "0\n"); /* always zero; should fit... */ +} + +static ssize_t write_scan_unevictable_node(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, size_t count) +{ + struct zone *node_zones = NODE_DATA(dev->id)->node_zones; + struct zone *zone; + unsigned long res; + unsigned long req = strict_strtoul(buf, 10, &res); + + if (!req) + return 1; /* zero is no-op */ + + for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { + if (!populated_zone(zone)) + continue; + scan_zone_unevictable_pages(zone); + } + return 1; +} + + +static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR, + read_scan_unevictable_node, + write_scan_unevictable_node); + +int scan_unevictable_register_node(struct node *node) +{ + return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages); +} + +void scan_unevictable_unregister_node(struct node *node) +{ + sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); +} + #endif -- cgit v1.2.3-70-g09d2