From ddd588b5dd55f14320379961e47683db4e4c1d90 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 22 Mar 2011 16:30:46 -0700 Subject: oom: suppress nodes that are not allowed from meminfo on oom kill The oom killer is extremely verbose for machines with a large number of cpus and/or nodes. This verbosity can often be harmful if it causes other important messages to be scrolled from the kernel log and incurs a signicant time delay, specifically for kernels with CONFIG_NODES_SHIFT > 8. This patch causes only memory information to be displayed for nodes that are allowed by current's cpuset when dumping the VM state. Information for all other nodes is irrelevant to the oom condition; we don't care if there's an abundance of memory elsewhere if we can't access it. This only affects the behavior of dumping memory information when an oom is triggered. Other dumps, such as for sysrq+m, still display the unfiltered form when using the existing show_mem() interface. Additionally, the per-cpu pageset statistics are extremely verbose in oom killer output, so it is now suppressed. This removes nodes_weight(current->mems_allowed) * (1 + nr_cpus) lines from the oom killer output. Callers may use __show_mem(SHOW_MEM_FILTER_NODES) to filter disallowed nodes. Signed-off-by: David Rientjes Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index 581703d86fb..1f82adc8535 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -859,7 +859,14 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) +/* + * Flags passed to __show_mem() and __show_free_areas() to suppress output in + * various contexts. + */ +#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */ + extern void show_free_areas(void); +extern void __show_free_areas(unsigned int flags); int shmem_lock(struct file *file, int lock, struct user_struct *user); struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); @@ -1348,6 +1355,7 @@ extern void calculate_zone_inactive_ratio(struct zone *zone); extern void mem_init(void); extern void __init mmap_init(void); extern void show_mem(void); +extern void __show_mem(unsigned int flags); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; -- cgit v1.2.3-70-g09d2 From 318b275fbca1ab9ec0862de71420e0e92c3d1aa7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 22 Mar 2011 16:30:51 -0700 Subject: mm: allow GUP to fail instead of waiting on a page GUP user may want to try to acquire a reference to a page if it is already in memory, but not if IO, to bring it in, is needed. For example KVM may tell vcpu to schedule another guest process if current one is trying to access swapped out page. Meanwhile, the page will be swapped in and the guest process, that depends on it, will be able to run again. This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and FOLL_NOWAIT follow_page flags. FAULT_FLAG_RETRY_NOWAIT, when used in conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY instead. [akpm@linux-foundation.org: improve FOLL_NOWAIT comment] Signed-off-by: Gleb Natapov Cc: Linus Torvalds Cc: Hugh Dickins Acked-by: Rik van Riel Cc: Michel Lespinasse Cc: Avi Kivity Cc: Marcelo Tosatti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ mm/filemap.c | 6 ++++-- mm/memory.c | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1f82adc8535..901435e3a9a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -151,6 +151,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ +#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -1545,6 +1546,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO + * and return without waiting upon it */ #define FOLL_MLOCK 0x40 /* mark page as mlocked */ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ diff --git a/mm/filemap.c b/mm/filemap.c index 83a45d35468..312b6eb7843 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -621,8 +621,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, __lock_page(page); return 1; } else { - up_read(&mm->mmap_sem); - wait_on_page_locked(page); + if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) { + up_read(&mm->mmap_sem); + wait_on_page_locked(page); + } return 0; } } diff --git a/mm/memory.c b/mm/memory.c index e48945ab362..615be5127ce 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1569,6 +1569,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, fault_flags |= FAULT_FLAG_WRITE; if (nonblocking) fault_flags |= FAULT_FLAG_ALLOW_RETRY; + if (foll_flags & FOLL_NOWAIT) + fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT); ret = handle_mm_fault(mm, vma, start, fault_flags); @@ -1595,7 +1597,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, tsk->min_flt++; if (ret & VM_FAULT_RETRY) { - *nonblocking = 0; + if (nonblocking) + *nonblocking = 0; return i; } -- cgit v1.2.3-70-g09d2 From 033193275b3ffcfe7f3fde7b569f3d207f6cd6a0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Mar 2011 16:32:56 -0700 Subject: pagewalk: only split huge pages when necessary Right now, if a mm_walk has either ->pte_entry or ->pmd_entry set, it will unconditionally split any transparent huge pages it runs in to. In practice, that means that anyone doing a cat /proc/$pid/smaps will unconditionally break down every huge page in the process and depend on khugepaged to re-collapse it later. This is fairly suboptimal. This patch changes that behavior. It teaches each ->pmd_entry handler (there are five) that they must break down the THPs themselves. Also, the _generic_ code will never break down a THP unless a ->pte_entry handler is actually set. This means that the ->pmd_entry handlers can now choose to deal with THPs without breaking them down. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Dave Hansen Acked-by: Mel Gorman Acked-by: David Rientjes Reviewed-by: Eric B Munson Tested-by: Eric B Munson Cc: Michael J Wolf Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Matt Mackall Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 6 ++++++ include/linux/mm.h | 3 +++ mm/memcontrol.c | 5 +++-- mm/pagewalk.c | 24 ++++++++++++++++++++---- 4 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/linux/mm.h') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 60b914860f8..78fd3621f56 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -343,6 +343,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct page *page; int mapcount; + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -467,6 +469,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -623,6 +627,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; int err = 0; + split_huge_page_pmd(walk->mm, pmd); + /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 901435e3a9a..294104e0891 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -914,6 +914,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlb, * @pgd_entry: if set, called for each non-empty PGD (top-level) entry * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry + * this handler is required to be able to handle + * pmd_trans_huge() pmds. They may simply choose to + * split_huge_page() instead of handling it explicitly. * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9e0f05efd11..e1ee6ad9c97 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4763,7 +4763,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, pte_t *pte; spinlock_t *ptl; - VM_BUG_ON(pmd_trans_huge(*pmd)); + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) if (is_target_pte_for_mc(vma, addr, *pte, NULL)) @@ -4925,8 +4926,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, pte_t *pte; spinlock_t *ptl; + split_huge_page_pmd(walk->mm, pmd); retry: - VM_BUG_ON(pmd_trans_huge(*pmd)); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; addr += PAGE_SIZE) { pte_t ptent = *(pte++); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 7cfa6ae0230..c3450d53361 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -33,19 +33,35 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, pmd = pmd_offset(pud, addr); do { +again: next = pmd_addr_end(addr, end); - split_huge_page_pmd(walk->mm, pmd); - if (pmd_none_or_clear_bad(pmd)) { + if (pmd_none(*pmd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; continue; } + /* + * This implies that each ->pmd_entry() handler + * needs to know about pmd_trans_huge() pmds + */ if (walk->pmd_entry) err = walk->pmd_entry(pmd, addr, next, walk); - if (!err && walk->pte_entry) - err = walk_pte_range(pmd, addr, next, walk); + if (err) + break; + + /* + * Check this here so we only break down trans_huge + * pages when we _need_ to + */ + if (!walk->pte_entry) + continue; + + split_huge_page_pmd(walk->mm, pmd); + if (pmd_none_or_clear_bad(pmd)) + goto again; + err = walk_pte_range(pmd, addr, next, walk); if (err) break; } while (pmd++, addr = next, addr != end); -- cgit v1.2.3-70-g09d2 From 31db58b3ab432f72ea76be58b12e6ffaf627d5db Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:15 -0400 Subject: mm: arch: make get_gate_vma take an mm_struct instead of a task_struct Morally, the presence of a gate vma is more an attribute of a particular mm than a particular task. Moreover, dropping the dependency on task_struct will help make both existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 6 +++--- arch/x86/vdso/vdso32-setup.c | 11 ++++++----- fs/binfmt_elf.c | 2 +- fs/proc/task_mmu.c | 8 +++++--- include/linux/mm.h | 2 +- mm/memory.c | 4 ++-- mm/mlock.c | 4 ++-- 10 files changed, 23 insertions(+), 20 deletions(-) (limited to 'include/linux/mm.h') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729ab..6169f175693 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -830,7 +830,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedb..d19f30504c6 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -347,7 +347,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 242117cbad6..3f9b6f41813 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -94,7 +94,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *task) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0aa34669ed3..dd4809b5844 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -861,10 +861,10 @@ static struct vm_area_struct gate_vma = { .vm_flags = VM_READ | VM_EXEC }; -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) + if (!mm || mm->context.ia32_compat) return NULL; #endif return &gate_vma; @@ -872,7 +872,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task); + struct vm_area_struct *vma = get_gate_vma(task->mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 36df991985b..1f651f6bdf6 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -417,11 +417,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - struct mm_struct *mm = tsk->mm; - - /* Check to see if this task was created in compat vdso mode */ + /* + * Check to see if the corresponding task was created in compat vdso + * mode. + */ if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) return &gate_vma; return NULL; @@ -429,7 +430,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task); + const struct vm_area_struct *vma = get_gate_vma(task->mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d5b640ba6cb..bbabdcce117 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); - gate_vma = get_gate_vma(current); + gate_vma = get_gate_vma(current->mm); if (gate_vma != NULL) segs++; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8fed0f88fbf..e73314afc53 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -126,7 +126,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) return mm; down_read(&mm->mmap_sem); - tail_vma = get_gate_vma(priv->task); + tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; /* Start with last addr hint */ @@ -277,7 +277,8 @@ static int show_map(struct seq_file *m, void *v) show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -436,7 +437,8 @@ static int show_smap(struct seq_file *m, void *v) (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 581703d86fb..18b4a6358ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1578,7 +1578,7 @@ static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ #endif -extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); +extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); int in_gate_area(struct task_struct *task, unsigned long addr); diff --git a/mm/memory.c b/mm/memory.c index e48945ab362..b6dc3709743 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1488,7 +1488,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, vma = find_extend_vma(mm, start); if (!vma && in_gate_area(tsk, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk); + struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -3496,7 +3496,7 @@ static int __init gate_vma_init(void) __initcall(gate_vma_init); #endif -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef AT_SYSINFO_EHDR return &gate_vma; diff --git a/mm/mlock.c b/mm/mlock.c index c3924c7f00b..2689a08c79a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -237,7 +237,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) { + vma == get_gate_vma(current->mm))) { __mlock_vma_pages_range(vma, start, end, NULL); @@ -332,7 +332,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, int lock = newflags & VM_LOCKED; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current)) + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) goto out; /* don't set VM_LOCKED, don't count */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); -- cgit v1.2.3-70-g09d2 From 83b964bbf82eb13a8f31bb49ca420787fe01f7a6 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:16 -0400 Subject: mm: arch: make in_gate_area take an mm_struct instead of a task_struct Morally, the question of whether an address lies in a gate vma should be asked with respect to an mm, not a particular task. Moreover, dropping the dependency on task_struct will help make existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/vdso/vdso32-setup.c | 4 ++-- include/linux/mm.h | 4 ++-- mm/memory.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux/mm.h') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6169f175693..467aa9ecbf9 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -825,7 +825,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d19f30504c6..9006e966ef0 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -342,7 +342,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 3f9b6f41813..62c36a8961d 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -99,7 +99,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return NULL; } -int in_gate_area(struct task_struct *task, unsigned long address) +int in_gate_area(struct mm_struct *mm, unsigned long address) { return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dd4809b5844..43c441622c8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -870,9 +870,9 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return &gate_vma; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task->mm); + struct vm_area_struct *vma = get_gate_vma(mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 1f651f6bdf6..f849bb29fda 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -428,9 +428,9 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return NULL; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task->mm); + const struct vm_area_struct *vma = get_gate_vma(mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 18b4a6358ab..5c6d916cd30 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1581,10 +1581,10 @@ static inline bool kernel_page_present(struct page *page) { return true; } extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); -int in_gate_area(struct task_struct *task, unsigned long addr); +int in_gate_area(struct mm_struct *mm, unsigned long addr); #else int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/mm/memory.c b/mm/memory.c index b6dc3709743..931d479b80c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1486,7 +1486,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma && in_gate_area(tsk, start)) { + if (!vma && in_gate_area(tsk->mm, start)) { unsigned long pg = start & PAGE_MASK; struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; -- cgit v1.2.3-70-g09d2 From cae5d39032acf26c265f6b1dc73d7ce6ff4bc387 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:17 -0400 Subject: mm: arch: rename in_gate_area_no_task to in_gate_area_no_mm Now that gate vma's are referenced with respect to a particular mm and not a particular task it only makes sense to propagate the change to this predicate as well. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 8 ++++---- arch/x86/vdso/vdso32-setup.c | 2 +- include/linux/mm.h | 6 +++--- kernel/kallsyms.c | 4 ++-- mm/memory.c | 2 +- mm/nommu.c | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux/mm.h') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 467aa9ecbf9..142ab1008c3 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -820,7 +820,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 9006e966ef0..d73630b4fe1 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -337,7 +337,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 62c36a8961d..1d6d51a1ce7 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -104,7 +104,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long address) return 0; } -int in_gate_area_no_task(unsigned long address) +int in_gate_area_no_mm(unsigned long address) { return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 43c441622c8..835393c8554 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -881,11 +881,11 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) } /* - * Use this when you have no reliable task/vma, typically from interrupt - * context. It is less reliable than using the task's vma and may give - * false positives: + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. */ -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index f849bb29fda..468d591dde3 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -435,7 +435,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) return vma && addr >= vma->vm_start && addr < vma->vm_end; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 5c6d916cd30..9d6efefdde5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1580,11 +1580,11 @@ static inline bool kernel_page_present(struct page *page) { return true; } extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA -int in_gate_area_no_task(unsigned long addr); +int in_gate_area_no_mm(unsigned long addr); int in_gate_area(struct mm_struct *mm, unsigned long addr); #else -int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_task(addr);}) +int in_gate_area_no_mm(unsigned long addr); +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f6d091b575..b9d0fd1d21c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr) if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || arch_is_kernel_text(addr)) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static int is_ksym_addr(unsigned long addr) diff --git a/mm/memory.c b/mm/memory.c index 931d479b80c..5f5b5de5a40 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3505,7 +3505,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) #endif } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { #ifdef AT_SYSINFO_EHDR if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) diff --git a/mm/nommu.c b/mm/nommu.c index f59e1424d3d..e629143f944 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1963,7 +1963,7 @@ error: return -ENOMEM; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -- cgit v1.2.3-70-g09d2 From 5ddd36b9c59887c6416e21daf984fbdd9b1818df Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:20 -0400 Subject: mm: implement access_remote_vm Provide an alternative to access_process_vm that allows the caller to obtain a reference to the supplied mm_struct. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- include/linux/mm.h | 2 ++ mm/memory.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d6efefdde5..60011d26bff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -971,6 +971,8 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write); int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, unsigned int foll_flags, diff --git a/mm/memory.c b/mm/memory.c index 820b4c4810f..468f5076754 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3711,6 +3711,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, return buf - old_buf; } +/** + * @access_remote_vm - access another process' address space + * @mm: the mm_struct of the target address space + * @addr: start address to access + * @buf: source or destination buffer + * @len: number of bytes to transfer + * @write: whether the access is a write + * + * The caller must hold a reference on @mm. + */ +int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write) +{ + return __access_remote_vm(NULL, mm, addr, buf, len, write); +} + /* * Access another process' address space. * Source/target buffer must be kernel space, -- cgit v1.2.3-70-g09d2 From b2b755b5f10eb32fbdc73a9907c07006b17f714b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 24 Mar 2011 15:18:15 -0700 Subject: lib, arch: add filter argument to show_mem and fix private implementations Commit ddd588b5dd55 ("oom: suppress nodes that are not allowed from meminfo on oom kill") moved lib/show_mem.o out of lib/lib.a, which resulted in build warnings on all architectures that implement their own versions of show_mem(): lib/lib.a(show_mem.o): In function `show_mem': show_mem.c:(.text+0x1f4): multiple definition of `show_mem' arch/sparc/mm/built-in.o:(.text+0xd70): first defined here The fix is to remove __show_mem() and add its argument to show_mem() in all implementations to prevent this breakage. Architectures that implement their own show_mem() actually don't do anything with the argument yet, but they could be made to filter nodes that aren't allowed in the current context in the future just like the generic implementation. Reported-by: Stephen Rothwell Reported-by: James Bottomley Suggested-by: Andrew Morton Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 2 +- arch/ia64/mm/contig.c | 2 +- arch/ia64/mm/discontig.c | 2 +- arch/parisc/mm/init.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- arch/sparc/mm/init_32.c | 2 +- arch/tile/mm/pgtable.c | 2 +- arch/unicore32/mm/init.c | 2 +- drivers/tty/sysrq.c | 2 +- drivers/tty/vt/keyboard.c | 2 +- include/linux/mm.h | 5 ++--- lib/show_mem.c | 7 +------ mm/oom_kill.c | 2 +- mm/page_alloc.c | 2 +- 14 files changed, 15 insertions(+), 21 deletions(-) (limited to 'include/linux/mm.h') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b3b0f0f5053..e5f6fc42834 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -78,7 +78,7 @@ __tagtable(ATAG_INITRD2, parse_tag_initrd2); */ struct meminfo meminfo; -void show_mem(void) +void show_mem(unsigned int filter) { int free = 0, total = 0, reserved = 0; int shared = 0, cached = 0, slab = 0, i; diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 54bf5405981..9a018cde5d8 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -36,7 +36,7 @@ static unsigned long max_gap; * Shows a simple page count of reserved and used pages in the system. * For discontig machines, it does this on a per-pgdat basis. */ -void show_mem(void) +void show_mem(unsigned int filter) { int i, total_reserved = 0; int total_shared = 0, total_cached = 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 61620323bb6..82ab1bc6afb 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -614,7 +614,7 @@ void __cpuinit *per_cpu_init(void) * Shows a simple page count of reserved and used pages in the system. * For discontig machines, it does this on a per-pgdat basis. */ -void show_mem(void) +void show_mem(unsigned int filter) { int i, total_reserved = 0; int total_shared = 0, total_cached = 0; diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index f4f4d700833..b7ed8d7a9b3 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -544,7 +544,7 @@ void __init mem_init(void) unsigned long *empty_zero_page __read_mostly; EXPORT_SYMBOL(empty_zero_page); -void show_mem(void) +void show_mem(unsigned int filter) { int i,free = 0,total = 0,reserved = 0; int shared = 0, cached = 0; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d17d04cfb2c..33794c1d92c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -821,7 +821,7 @@ cmds(struct pt_regs *excp) memzcan(); break; case 'i': - show_mem(); + show_mem(0); break; default: termch = cmd; diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 6d0e02c4fe0..4c31e2b6e71 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -75,7 +75,7 @@ void __init kmap_init(void) kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE); } -void show_mem(void) +void show_mem(unsigned int filter) { printk("Mem-info:\n"); show_free_areas(); diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 1a2b36f8866..de7d8e21e01 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -41,7 +41,7 @@ * The normal show_free_areas() is too verbose on Tile, with dozens * of processors and often four NUMA zones each with high and lowmem. */ -void show_mem(void) +void show_mem(unsigned int filter) { struct zone *zone; diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 3dbe3709b69..1fc02633f70 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -55,7 +55,7 @@ early_param("initrd", early_initrd); */ struct meminfo meminfo; -void show_mem(void) +void show_mem(unsigned int filter) { int free = 0, total = 0, reserved = 0; int shared = 0, cached = 0, slab = 0, i; diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 81f13958e75..43db715f150 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -306,7 +306,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = { static void sysrq_handle_showmem(int key) { - show_mem(); + show_mem(0); } static struct sysrq_key_op sysrq_showmem_op = { .handler = sysrq_handle_showmem, diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 6dd3c68c13a..d6b342b5b42 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -600,7 +600,7 @@ static void fn_scroll_back(struct vc_data *vc) static void fn_show_mem(struct vc_data *vc) { - show_mem(); + show_mem(0); } static void fn_show_state(struct vc_data *vc) diff --git a/include/linux/mm.h b/include/linux/mm.h index f9535b2c955..7606d7db96c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -861,7 +861,7 @@ extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) /* - * Flags passed to __show_mem() and __show_free_areas() to suppress output in + * Flags passed to show_mem() and __show_free_areas() to suppress output in * various contexts. */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */ @@ -1360,8 +1360,7 @@ extern void setup_per_zone_wmarks(void); extern void calculate_zone_inactive_ratio(struct zone *zone); extern void mem_init(void); extern void __init mmap_init(void); -extern void show_mem(void); -extern void __show_mem(unsigned int flags); +extern void show_mem(unsigned int flags); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; diff --git a/lib/show_mem.c b/lib/show_mem.c index d8d602b58c3..90cbe4bb596 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -9,7 +9,7 @@ #include #include -void __show_mem(unsigned int filter) +void show_mem(unsigned int filter) { pg_data_t *pgdat; unsigned long total = 0, reserved = 0, shared = 0, @@ -61,8 +61,3 @@ void __show_mem(unsigned int filter) quicklist_total_size()); #endif } - -void show_mem(void) -{ - __show_mem(0); -} diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 62a5cec08a1..6a819d1b2c7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -406,7 +406,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, task_unlock(current); dump_stack(); mem_cgroup_print_oom_info(mem, p); - __show_mem(SHOW_MEM_FILTER_NODES); + show_mem(SHOW_MEM_FILTER_NODES); if (sysctl_oom_dump_tasks) dump_tasks(mem, nodemask); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8e5726ab0d8..d6e7ba7373b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2195,7 +2195,7 @@ nopage: current->comm, order, gfp_mask); dump_stack(); if (!should_suppress_show_mem()) - __show_mem(filter); + show_mem(filter); } return page; got_pg: -- cgit v1.2.3-70-g09d2