From b291f000393f5a0b679012b39d79fbc85c018233 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 18 Oct 2008 20:26:44 -0700 Subject: mlock: mlocked pages are unevictable Make sure that mlocked pages also live on the unevictable LRU, so kswapd will not scan them over and over again. This is achieved through various strategies: 1) add yet another page flag--PG_mlocked--to indicate that the page is locked for efficient testing in vmscan and, optionally, fault path. This allows early culling of unevictable pages, preventing them from getting to page_referenced()/try_to_unmap(). Also allows separate accounting of mlock'd pages, as Nick's original patch did. Note: Nick's original mlock patch used a PG_mlocked flag. I had removed this in favor of the PG_unevictable flag + an mlock_count [new page struct member]. I restored the PG_mlocked flag to eliminate the new count field. 2) add the mlock/unevictable infrastructure to mm/mlock.c, with internal APIs in mm/internal.h. This is a rework of Nick's original patch to these files, taking into account that mlocked pages are now kept on unevictable LRU list. 3) update vmscan.c:page_evictable() to check PageMlocked() and, if vma passed in, the vm_flags. Note that the vma will only be passed in for new pages in the fault path; and then only if the "cull unevictable pages in fault path" patch is included. 4) add try_to_unlock() to rmap.c to walk a page's rmap and ClearPageMlocked() if no other vmas have it mlocked. Reuses as much of try_to_unmap() as possible. This effectively replaces the use of one of the lru list links as an mlock count. If this mechanism let's pages in mlocked vmas leak through w/o PG_mlocked set [I don't know that it does], we should catch them later in try_to_unmap(). One hopes this will be rare, as it will be relatively expensive. Original mm/internal.h, mm/rmap.c and mm/mlock.c changes: Signed-off-by: Nick Piggin splitlru: introduce __get_user_pages(): New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS. because current get_user_pages() can't grab PROT_NONE pages theresore it cause PROT_NONE pages can't munlock. [akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch] [akpm@linux-foundation.org: untangle patch interdependencies] [akpm@linux-foundation.org: fix things after out-of-order merging] [hugh@veritas.com: fix page-flags mess] [lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm'] [kosaki.motohiro@jp.fujitsu.com: build fix] [kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments] [kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()] Signed-off-by: KOSAKI Motohiro Signed-off-by: Rik van Riel Signed-off-by: Lee Schermerhorn Cc: Nick Piggin Cc: Dave Hansen Cc: Matt Mackall Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ include/linux/page-flags.h | 19 ++++++++++++++++--- include/linux/rmap.h | 14 ++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 40236290e2a..ffee2f74341 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -131,6 +131,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) +/* + * special vmas that are non-mergable, non-mlock()able + */ +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ec1a1baad34..b12f93a3c34 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -96,6 +96,7 @@ enum pageflags { PG_swapbacked, /* Page is backed by RAM/swap */ #ifdef CONFIG_UNEVICTABLE_LRU PG_unevictable, /* Page is "unevictable" */ + PG_mlocked, /* Page is vma mlocked */ #endif #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR PG_uncached, /* Page has been mapped as uncached */ @@ -232,7 +233,17 @@ PAGEFLAG_FALSE(SwapCache) #ifdef CONFIG_UNEVICTABLE_LRU PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) TESTCLEARFLAG(Unevictable, unevictable) + +#define MLOCK_PAGES 1 +PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) + TESTSCFLAG(Mlocked, mlocked) + #else + +#define MLOCK_PAGES 0 +PAGEFLAG_FALSE(Mlocked) + SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) + PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable) SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable) __CLEARPAGEFLAG_NOOP(Unevictable) @@ -354,15 +365,17 @@ static inline void __ClearPageTail(struct page *page) #endif /* !PAGEFLAGS_EXTENDED */ #ifdef CONFIG_UNEVICTABLE_LRU -#define __PG_UNEVICTABLE (1 << PG_unevictable) +#define __PG_UNEVICTABLE (1 << PG_unevictable) +#define __PG_MLOCKED (1 << PG_mlocked) #else -#define __PG_UNEVICTABLE 0 +#define __PG_UNEVICTABLE 0 +#define __PG_MLOCKED 0 #endif #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 1 << PG_buddy | 1 << PG_writeback | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - __PG_UNEVICTABLE) + __PG_UNEVICTABLE | __PG_MLOCKED) /* * Flags checked in bad_page(). Pages on the free list should not have diff --git a/include/linux/rmap.h b/include/linux/rmap.h index fed6f5e0b41..955667e6a52 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -117,6 +117,19 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); */ int page_mkclean(struct page *); +#ifdef CONFIG_UNEVICTABLE_LRU +/* + * called in munlock()/munmap() path to check for other vmas holding + * the page mlocked. + */ +int try_to_munlock(struct page *); +#else +static inline int try_to_munlock(struct page *page) +{ + return 0; /* a.k.a. SWAP_SUCCESS */ +} +#endif + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -140,5 +153,6 @@ static inline int page_mkclean(struct page *page) #define SWAP_SUCCESS 0 #define SWAP_AGAIN 1 #define SWAP_FAIL 2 +#define SWAP_MLOCK 3 #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3-70-g09d2