From 1c3aff1ceec2cc86810e2690e67873ff0c505862 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:03:24 -0700 Subject: mm: remove unused GUP flags GUP_FLAGS_IGNORE_VMA_PERMISSIONS and GUP_FLAGS_IGNORE_SIGKILL were flags added solely to prevent __get_user_pages() from doing some of what it usually does, in the munlock case: we can now remove them. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Mel Gorman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index f290c4db528..166765cd58d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -250,10 +250,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, } #endif /* CONFIG_SPARSEMEM */ -#define GUP_FLAGS_WRITE 0x1 -#define GUP_FLAGS_FORCE 0x2 -#define GUP_FLAGS_IGNORE_VMA_PERMISSIONS 0x4 -#define GUP_FLAGS_IGNORE_SIGKILL 0x8 +#define GUP_FLAGS_WRITE 0x01 +#define GUP_FLAGS_FORCE 0x02 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int flags, -- cgit v1.2.3-70-g09d2 From 8e4b9a60718970bbc02dfd3abd0b956ab65af231 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:03:26 -0700 Subject: mm: FOLL_DUMP replace FOLL_ANON The "FOLL_ANON optimization" and its use_zero_page() test have caused confusion and bugs: why does it test VM_SHARED? for the very good but unsatisfying reason that VMware crashed without. As we look to maybe reinstating anonymous use of the ZERO_PAGE, we need to sort this out. Easily done: it's silly for __get_user_pages() and follow_page() to be guessing whether it's safe to assume that they're being used for a coredump (which can take a shortcut snapshot where other uses must handle a fault) - just tell them with GUP_FLAGS_DUMP and FOLL_DUMP. get_dump_page() doesn't even want a ZERO_PAGE: an error suits fine. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Acked-by: Mel Gorman Reviewed-by: Minchan Kim Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- mm/internal.h | 1 + mm/memory.c | 43 ++++++++++++------------------------------- 3 files changed, 14 insertions(+), 32 deletions(-) (limited to 'mm/internal.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index e41795bba95..45ee5b5a343 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/internal.h b/mm/internal.h index 166765cd58d..d41475078b2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, #define GUP_FLAGS_WRITE 0x01 #define GUP_FLAGS_FORCE 0x02 +#define GUP_FLAGS_DUMP 0x04 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int flags, diff --git a/mm/memory.c b/mm/memory.c index a8430ff1383..532a55bce6a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1174,41 +1174,22 @@ no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return page; - /* Fall through to ZERO_PAGE handling */ + no_page_table: /* * When core dumping an enormous anonymous area that nobody - * has touched so far, we don't want to allocate page tables. + * has touched so far, we don't want to allocate unnecessary pages or + * page tables. Return error instead of NULL to skip handle_mm_fault, + * then get_dump_page() will return NULL to leave a hole in the dump. + * But we can only make this optimization where a hole would surely + * be zero-filled if handle_mm_fault() actually did handle it. */ - if (flags & FOLL_ANON) { - page = ZERO_PAGE(0); - if (flags & FOLL_GET) - get_page(page); - BUG_ON(flags & FOLL_WRITE); - } + if ((flags & FOLL_DUMP) && + (!vma->vm_ops || !vma->vm_ops->fault)) + return ERR_PTR(-EFAULT); return page; } -/* Can we do the FOLL_ANON optimization? */ -static inline int use_zero_page(struct vm_area_struct *vma) -{ - /* - * We don't want to optimize FOLL_ANON for make_pages_present() - * when it tries to page in a VM_LOCKED region. As to VM_SHARED, - * we want to get the page from the page tables to make sure - * that we serialize and update with any other user of that - * mapping. - */ - if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) - return 0; - /* - * And if we have a fault routine, it's not an anonymous region. - */ - return !vma->vm_ops || !vma->vm_ops->fault; -} - - - int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int flags, struct page **pages, struct vm_area_struct **vmas) @@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, foll_flags = FOLL_TOUCH; if (pages) foll_flags |= FOLL_GET; - if (!write && use_zero_page(vma)) - foll_flags |= FOLL_ANON; + if (flags & GUP_FLAGS_DUMP) + foll_flags |= FOLL_DUMP; do { struct page *page; @@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr) struct page *page; if (__get_user_pages(current, current->mm, addr, 1, - GUP_FLAGS_FORCE, &page, &vma) < 1) + GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1) return NULL; if (page == ZERO_PAGE(0)) { page_cache_release(page); -- cgit v1.2.3-70-g09d2 From 58fa879e1e640a1856f736b418984ebeccee1c95 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:03:31 -0700 Subject: mm: FOLL flags for GUP flags __get_user_pages() has been taking its own GUP flags, then processing them into FOLL flags for follow_page(). Though oddly named, the FOLL flags are more widely used, so pass them to __get_user_pages() now. Sorry, VM flags, VM_FAULT flags and FAULT_FLAGs are still distinct. (The patch to __get_user_pages() looks peculiar, with both gup_flags and foll_flags: the gup_flags remain constant; but as before there's an exceptional case, out of scope of the patch, in which foll_flags per page have FOLL_WRITE masked off.) Signed-off-by: Hugh Dickins Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Mel Gorman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/internal.h | 6 +----- mm/memory.c | 44 +++++++++++++++++++------------------------- mm/mlock.c | 4 ++-- mm/nommu.c | 16 ++++++++-------- 5 files changed, 31 insertions(+), 40 deletions(-) (limited to 'mm/internal.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index 45ee5b5a343..5409eced7aa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1232,6 +1232,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/internal.h b/mm/internal.h index d41475078b2..75596574911 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -250,12 +250,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, } #endif /* CONFIG_SPARSEMEM */ -#define GUP_FLAGS_WRITE 0x01 -#define GUP_FLAGS_FORCE 0x02 -#define GUP_FLAGS_DUMP 0x04 - int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, int flags, + unsigned long start, int len, unsigned int foll_flags, struct page **pages, struct vm_area_struct **vmas); #define ZONE_RECLAIM_NOSCAN -2 diff --git a/mm/memory.c b/mm/memory.c index c8b5b9435a9..5c694f2b9c1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1209,27 +1209,29 @@ no_page_table: } int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, int flags, + unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { int i; - unsigned int vm_flags = 0; - int write = !!(flags & GUP_FLAGS_WRITE); - int force = !!(flags & GUP_FLAGS_FORCE); + unsigned long vm_flags; if (nr_pages <= 0) return 0; + + VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); + /* * Require read or write permissions. - * If 'force' is set, we only require the "MAY" flags. + * If FOLL_FORCE is set, we only require the "MAY" flags. */ - vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); - vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); + vm_flags = (gup_flags & FOLL_WRITE) ? + (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); + vm_flags &= (gup_flags & FOLL_FORCE) ? + (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { struct vm_area_struct *vma; - unsigned int foll_flags; vma = find_extend_vma(mm, start); if (!vma && in_gate_area(tsk, start)) { @@ -1241,7 +1243,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, pte_t *pte; /* user gate pages are read-only */ - if (write) + if (gup_flags & FOLL_WRITE) return i ? : -EFAULT; if (pg > TASK_SIZE) pgd = pgd_offset_k(pg); @@ -1278,22 +1280,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, !(vm_flags & vma->vm_flags)) return i ? : -EFAULT; - foll_flags = FOLL_TOUCH; - if (pages) - foll_flags |= FOLL_GET; - if (flags & GUP_FLAGS_DUMP) - foll_flags |= FOLL_DUMP; - if (write) - foll_flags |= FOLL_WRITE; - if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &nr_pages, i, foll_flags); + &start, &nr_pages, i, gup_flags); continue; } do { struct page *page; + unsigned int foll_flags = gup_flags; /* * If we have a pending SIGKILL, don't keep faulting @@ -1302,9 +1297,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (unlikely(fatal_signal_pending(current))) return i ? i : -ERESTARTSYS; - if (write) - foll_flags |= FOLL_WRITE; - cond_resched(); while (!(page = follow_page(vma, start, foll_flags))) { int ret; @@ -1415,12 +1407,14 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { - int flags = 0; + int flags = FOLL_TOUCH; + if (pages) + flags |= FOLL_GET; if (write) - flags |= GUP_FLAGS_WRITE; + flags |= FOLL_WRITE; if (force) - flags |= GUP_FLAGS_FORCE; + flags |= FOLL_FORCE; return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); } @@ -1447,7 +1441,7 @@ struct page *get_dump_page(unsigned long addr) struct page *page; if (__get_user_pages(current, current->mm, addr, 1, - GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1) + FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1) return NULL; if (page == ZERO_PAGE(0)) { page_cache_release(page); diff --git a/mm/mlock.c b/mm/mlock.c index e13918d4fc4..22041aa9f5c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -166,9 +166,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, VM_BUG_ON(end > vma->vm_end); VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - gup_flags = 0; + gup_flags = FOLL_TOUCH | FOLL_GET; if (vma->vm_flags & VM_WRITE) - gup_flags = GUP_FLAGS_WRITE; + gup_flags |= FOLL_WRITE; while (nr_pages > 0) { int i; diff --git a/mm/nommu.c b/mm/nommu.c index 386443e9d2c..2d02ca17ce1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -168,20 +168,20 @@ unsigned int kobjsize(const void *objp) } int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int nr_pages, int flags, + unsigned long start, int nr_pages, int foll_flags, struct page **pages, struct vm_area_struct **vmas) { struct vm_area_struct *vma; unsigned long vm_flags; int i; - int write = !!(flags & GUP_FLAGS_WRITE); - int force = !!(flags & GUP_FLAGS_FORCE); /* calculate required read or write permissions. - * - if 'force' is set, we only require the "MAY" flags. + * If FOLL_FORCE is set, we only require the "MAY" flags. */ - vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); - vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); + vm_flags = (foll_flags & FOLL_WRITE) ? + (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); + vm_flags &= (foll_flags & FOLL_FORCE) ? + (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); for (i = 0; i < nr_pages; i++) { vma = find_vma(mm, start); @@ -223,9 +223,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int flags = 0; if (write) - flags |= GUP_FLAGS_WRITE; + flags |= FOLL_WRITE; if (force) - flags |= GUP_FLAGS_FORCE; + flags |= FOLL_FORCE; return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); } -- cgit v1.2.3-70-g09d2 From 03f6462a3ae78f36eb1f0ee8b4d5ae2f7859c1d5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Sep 2009 17:03:35 -0700 Subject: mm: move highest_memmap_pfn Move highest_memmap_pfn __read_mostly from page_alloc.c next to zero_pfn __read_mostly in memory.c: to help them share a cacheline, since they're very often tested together in vm_normal_page(). Signed-off-by: Hugh Dickins Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Mel Gorman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 3 ++- mm/memory.c | 1 + mm/page_alloc.c | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/internal.h') diff --git a/mm/internal.h b/mm/internal.h index 75596574911..22ec8d2b0fb 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -37,6 +37,8 @@ static inline void __put_page(struct page *page) atomic_dec(&page->_count); } +extern unsigned long highest_memmap_pfn; + /* * in mm/vmscan.c: */ @@ -46,7 +48,6 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ -extern unsigned long highest_memmap_pfn; extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); diff --git a/mm/memory.c b/mm/memory.c index 9bdbd10cb41..b1443ac07c0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -109,6 +109,7 @@ static int __init disable_randmaps(char *s) __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; +unsigned long highest_memmap_pfn __read_mostly; /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6877e22e3aa..5717f27a070 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -72,7 +72,6 @@ EXPORT_SYMBOL(node_states); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; -unsigned long highest_memmap_pfn __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; -- cgit v1.2.3-70-g09d2