From 0014bd990e69063b0fb78940b35439d7980ce3ee Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 30 Jan 2011 11:15:47 +0800 Subject: mm: export __get_user_pages In most cases, get_user_pages and get_user_pages_fast should be used to pin user pages in memory. But sometimes, some special flags except FOLL_GET, FOLL_WRITE and FOLL_FORCE are needed, for example in following patch, KVM needs FOLL_HWPOISON. To support these users, __get_user_pages is exported directly. There are some symbol name conflicts in infiniband driver, fixed them too. Signed-off-by: Huang Ying CC: Andrew Morton CC: Michel Lespinasse CC: Roland Dreier CC: Ralph Campbell Signed-off-by: Marcelo Tosatti --- mm/internal.h | 5 ----- mm/memory.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/internal.h b/mm/internal.h index 69488205723..3438dd43a06 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -245,11 +245,6 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, } #endif /* CONFIG_SPARSEMEM */ -int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, unsigned int foll_flags, - struct page **pages, struct vm_area_struct **vmas, - int *nonblocking); - #define ZONE_RECLAIM_NOSCAN -2 #define ZONE_RECLAIM_FULL -1 #define ZONE_RECLAIM_SOME 0 diff --git a/mm/memory.c b/mm/memory.c index 5823698c2b7..806a37ec71b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1410,6 +1410,55 @@ no_page_table: return page; } +/** + * __get_user_pages() - pin user pages in memory + * @tsk: task_struct of target task + * @mm: mm_struct of target mm + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @gup_flags: flags modifying pin behaviour + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. Or NULL, if caller + * only intends to ensure the pages are faulted in. + * @vmas: array of pointers to vmas corresponding to each page. + * Or NULL if the caller does not require them. + * @nonblocking: whether waiting for disk IO or mmap_sem contention + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. Each page returned must be released + * with a put_page() call when it is finished with. vmas will only + * remain valid while mmap_sem is held. + * + * Must be called with mmap_sem held for read or write. + * + * __get_user_pages walks a process's page tables and takes a reference to + * each struct page that each user address corresponds to at a given + * instant. That is, it takes the page that would be accessed if a user + * thread accesses the given user virtual address at that instant. + * + * This does not guarantee that the page exists in the user mappings when + * __get_user_pages returns, and there may even be a completely different + * page there in some cases (eg. if mmapped pagecache has been invalidated + * and subsequently re faulted). However it does guarantee that the page + * won't be freed completely. And mostly callers simply care that the page + * contains data that was valid *at some point in time*. Typically, an IO + * or similar operation cannot guarantee anything stronger anyway because + * locks can't be held over the syscall boundary. + * + * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If + * the page is written to, set_page_dirty (or set_page_dirty_lock, as + * appropriate) must be called after the page is finished with, and + * before put_page is called. + * + * If @nonblocking != NULL, __get_user_pages will not wait for disk IO + * or mmap_sem contention, and if waiting is needed to pin all pages, + * *@nonblocking will be set to 0. + * + * In most cases, get_user_pages or get_user_pages_fast should be used + * instead of __get_user_pages. __get_user_pages should be used only if + * you need some special @gup_flags. + */ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, @@ -1578,6 +1627,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } while (nr_pages); return i; } +EXPORT_SYMBOL(__get_user_pages); /** * get_user_pages() - pin user pages in memory -- cgit v1.2.3-70-g09d2 From 69ebb83e13e514222b0ae4f8bd813a17679ed876 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 30 Jan 2011 11:15:48 +0800 Subject: mm: make __get_user_pages return -EHWPOISON for HWPOISON page optionally Make __get_user_pages return -EHWPOISON for HWPOISON page only if FOLL_HWPOISON is specified. With this patch, the interested callers can distinguish HWPOISON pages from general FAULT pages, while other callers will still get -EFAULT for all these pages, so the user space interface need not to be changed. This feature is needed by KVM, where UCR MCE should be relayed to guest for HWPOISON page, while instruction emulation and MMIO will be tried for general FAULT page. The idea comes from Andrew Morton. Signed-off-by: Huang Ying Cc: Andrew Morton Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/alpha/include/asm/errno.h | 2 ++ arch/mips/include/asm/errno.h | 2 ++ arch/parisc/include/asm/errno.h | 2 ++ arch/sparc/include/asm/errno.h | 2 ++ include/asm-generic/errno.h | 2 ++ include/linux/mm.h | 1 + mm/memory.c | 13 ++++++++++--- 7 files changed, 21 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/arch/alpha/include/asm/errno.h b/arch/alpha/include/asm/errno.h index 98099bda937..e5f29ca2818 100644 --- a/arch/alpha/include/asm/errno.h +++ b/arch/alpha/include/asm/errno.h @@ -122,4 +122,6 @@ #define ERFKILL 138 /* Operation not possible due to RF-kill */ +#define EHWPOISON 139 /* Memory page has hardware error */ + #endif diff --git a/arch/mips/include/asm/errno.h b/arch/mips/include/asm/errno.h index a0efc73819e..6dcd3583ed0 100644 --- a/arch/mips/include/asm/errno.h +++ b/arch/mips/include/asm/errno.h @@ -121,6 +121,8 @@ #define ERFKILL 167 /* Operation not possible due to RF-kill */ +#define EHWPOISON 168 /* Memory page has hardware error */ + #define EDQUOT 1133 /* Quota exceeded */ #ifdef __KERNEL__ diff --git a/arch/parisc/include/asm/errno.h b/arch/parisc/include/asm/errno.h index 9992abdd782..135ad6047e5 100644 --- a/arch/parisc/include/asm/errno.h +++ b/arch/parisc/include/asm/errno.h @@ -122,4 +122,6 @@ #define ERFKILL 256 /* Operation not possible due to RF-kill */ +#define EHWPOISON 257 /* Memory page has hardware error */ + #endif diff --git a/arch/sparc/include/asm/errno.h b/arch/sparc/include/asm/errno.h index 4e2bc490d71..c351aba997b 100644 --- a/arch/sparc/include/asm/errno.h +++ b/arch/sparc/include/asm/errno.h @@ -112,4 +112,6 @@ #define ERFKILL 134 /* Operation not possible due to RF-kill */ +#define EHWPOISON 135 /* Memory page has hardware error */ + #endif diff --git a/include/asm-generic/errno.h b/include/asm-generic/errno.h index 28cc03bf19e..a1331ce5044 100644 --- a/include/asm-generic/errno.h +++ b/include/asm-generic/errno.h @@ -108,4 +108,6 @@ #define ERFKILL 132 /* Operation not possible due to RF-kill */ +#define EHWPOISON 133 /* Memory page has hardware error */ + #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 46150c66318..a77c82c56e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1532,6 +1532,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_MLOCK 0x40 /* mark page as mlocked */ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ +#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/memory.c b/mm/memory.c index 806a37ec71b..346ee7e041f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1576,9 +1576,16 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) return i ? i : -ENOMEM; - if (ret & - (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE| - VM_FAULT_SIGBUS)) + if (ret & (VM_FAULT_HWPOISON | + VM_FAULT_HWPOISON_LARGE)) { + if (i) + return i; + else if (gup_flags & FOLL_HWPOISON) + return -EHWPOISON; + else + return -EFAULT; + } + if (ret & VM_FAULT_SIGBUS) return i ? i : -EFAULT; BUG(); } -- cgit v1.2.3-70-g09d2 From f58c9df78c0360f0eb3852b9cc3a61e689bc2dd1 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 30 Jan 2011 11:15:49 +0800 Subject: mm: remove is_hwpoison_address Unused. Signed-off-by: Huang Ying Signed-off-by: Marcelo Tosatti --- include/linux/mm.h | 8 -------- mm/memory-failure.c | 32 -------------------------------- 2 files changed, 40 deletions(-) (limited to 'mm') diff --git a/include/linux/mm.h b/include/linux/mm.h index a77c82c56e0..78219887308 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1625,14 +1625,6 @@ extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); extern atomic_long_t mce_bad_pages; extern int soft_offline_page(struct page *page, int flags); -#ifdef CONFIG_MEMORY_FAILURE -int is_hwpoison_address(unsigned long addr); -#else -static inline int is_hwpoison_address(unsigned long addr) -{ - return 0; -} -#endif extern void dump_page(struct page *page); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 0207c2f6f8b..99ccb447262 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1487,35 +1487,3 @@ done: /* keep elevated page count for bad page */ return ret; } - -/* - * The caller must hold current->mm->mmap_sem in read mode. - */ -int is_hwpoison_address(unsigned long addr) -{ - pgd_t *pgdp; - pud_t pud, *pudp; - pmd_t pmd, *pmdp; - pte_t pte, *ptep; - swp_entry_t entry; - - pgdp = pgd_offset(current->mm, addr); - if (!pgd_present(*pgdp)) - return 0; - pudp = pud_offset(pgdp, addr); - pud = *pudp; - if (!pud_present(pud) || pud_large(pud)) - return 0; - pmdp = pmd_offset(pudp, addr); - pmd = *pmdp; - if (!pmd_present(pmd) || pmd_large(pmd)) - return 0; - ptep = pte_offset_map(pmdp, addr); - pte = *ptep; - pte_unmap(ptep); - if (!is_swap_pte(pte)) - return 0; - entry = pte_to_swp_entry(pte); - return is_hwpoison_entry(entry); -} -EXPORT_SYMBOL_GPL(is_hwpoison_address); -- cgit v1.2.3-70-g09d2