From 033193275b3ffcfe7f3fde7b569f3d207f6cd6a0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Mar 2011 16:32:56 -0700 Subject: pagewalk: only split huge pages when necessary Right now, if a mm_walk has either ->pte_entry or ->pmd_entry set, it will unconditionally split any transparent huge pages it runs in to. In practice, that means that anyone doing a cat /proc/$pid/smaps will unconditionally break down every huge page in the process and depend on khugepaged to re-collapse it later. This is fairly suboptimal. This patch changes that behavior. It teaches each ->pmd_entry handler (there are five) that they must break down the THPs themselves. Also, the _generic_ code will never break down a THP unless a ->pte_entry handler is actually set. This means that the ->pmd_entry handlers can now choose to deal with THPs without breaking them down. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Dave Hansen Acked-by: Mel Gorman Acked-by: David Rientjes Reviewed-by: Eric B Munson Tested-by: Eric B Munson Cc: Michael J Wolf Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Matt Mackall Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 60b914860f8..78fd3621f56 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -343,6 +343,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct page *page; int mapcount; + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -467,6 +469,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -623,6 +627,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; int err = 0; + split_huge_page_pmd(walk->mm, pmd); + /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { -- cgit v1.2.3-70-g09d2 From ae11c4d9f646064cf086e2f8cd4b3c475df7739c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Mar 2011 16:32:58 -0700 Subject: smaps: break out smaps_pte_entry() from smaps_pte_range() We will use smaps_pte_entry() in a moment to handle both small and transparent large pages. But, we must break it out of smaps_pte_range() first. Signed-off-by: Dave Hansen Acked-by: Mel Gorman Acked-by: Johannes Weiner Acked-by: David Rientjes Reviewed-by: Eric B Munson Tested-by: Eric B Munson Cc: Michael J Wolf Cc: Andrea Arcangeli Cc: Matt Mackall Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 87 +++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 40 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 78fd3621f56..5cd06fa3106 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -333,56 +333,63 @@ struct mem_size_stats { u64 pss; }; -static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct mm_walk *walk) + +static void smaps_pte_entry(pte_t ptent, unsigned long addr, + struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; - pte_t *pte, ptent; - spinlock_t *ptl; struct page *page; int mapcount; - split_huge_page_pmd(walk->mm, pmd); - - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (; addr != end; pte++, addr += PAGE_SIZE) { - ptent = *pte; + if (is_swap_pte(ptent)) { + mss->swap += PAGE_SIZE; + return; + } - if (is_swap_pte(ptent)) { - mss->swap += PAGE_SIZE; - continue; - } + if (!pte_present(ptent)) + return; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + return; + + if (PageAnon(page)) + mss->anonymous += PAGE_SIZE; + + mss->resident += PAGE_SIZE; + /* Accumulate the size in pages that have been accessed. */ + if (pte_young(ptent) || PageReferenced(page)) + mss->referenced += PAGE_SIZE; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + if (pte_dirty(ptent) || PageDirty(page)) + mss->shared_dirty += PAGE_SIZE; + else + mss->shared_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + } else { + if (pte_dirty(ptent) || PageDirty(page)) + mss->private_dirty += PAGE_SIZE; + else + mss->private_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT); + } +} - if (!pte_present(ptent)) - continue; +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + pte_t *pte; + spinlock_t *ptl; - page = vm_normal_page(vma, addr, ptent); - if (!page) - continue; + split_huge_page_pmd(walk->mm, pmd); - if (PageAnon(page)) - mss->anonymous += PAGE_SIZE; - - mss->resident += PAGE_SIZE; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += PAGE_SIZE; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; - else - mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; - } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; - else - mss->private_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT); - } - } + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) + smaps_pte_entry(*pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; -- cgit v1.2.3-70-g09d2 From 3c9acc7849b1eab7ffc75e933404c5f32865d9a2 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Mar 2011 16:32:59 -0700 Subject: smaps: pass pte size argument in to smaps_pte_entry() Add an argument to the new smaps_pte_entry() function to let it account in things other than PAGE_SIZE units. I changed all of the PAGE_SIZE sites, even though not all of them can be reached for transparent huge pages, just so this will continue to work without changes as THPs are improved. Signed-off-by: Dave Hansen Acked-by: Mel Gorman Acked-by: Johannes Weiner Acked-by: David Rientjes Reviewed-by: Eric B Munson Tested-by: Eric B Munson Cc: Michael J Wolf Cc: Andrea Arcangeli Cc: Matt Mackall Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5cd06fa3106..d7e2af33407 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -335,7 +335,7 @@ struct mem_size_stats { static void smaps_pte_entry(pte_t ptent, unsigned long addr, - struct mm_walk *walk) + unsigned long ptent_size, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; @@ -343,7 +343,7 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr, int mapcount; if (is_swap_pte(ptent)) { - mss->swap += PAGE_SIZE; + mss->swap += ptent_size; return; } @@ -355,25 +355,25 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr, return; if (PageAnon(page)) - mss->anonymous += PAGE_SIZE; + mss->anonymous += ptent_size; - mss->resident += PAGE_SIZE; + mss->resident += ptent_size; /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += PAGE_SIZE; + mss->referenced += ptent_size; mapcount = page_mapcount(page); if (mapcount >= 2) { if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; + mss->shared_dirty += ptent_size; else - mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + mss->shared_clean += ptent_size; + mss->pss += (ptent_size << PSS_SHIFT) / mapcount; } else { if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; + mss->private_dirty += ptent_size; else - mss->private_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT); + mss->private_clean += ptent_size; + mss->pss += (ptent_size << PSS_SHIFT); } } @@ -389,7 +389,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) - smaps_pte_entry(*pte, addr, walk); + smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; -- cgit v1.2.3-70-g09d2 From 22e057c5923e60debad318cbeaee33033b110bc8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Mar 2011 16:33:00 -0700 Subject: smaps: teach smaps_pte_range() about THP pmds This adds code to explicitly detect and handle pmd_trans_huge() pmds. It then passes HPAGE_SIZE units in to the smap_pte_entry() function instead of PAGE_SIZE. This means that using /proc/$pid/smaps now will no longer cause THPs to be broken down in to small pages. Signed-off-by: Dave Hansen Reviewed-by: Eric B Munson Tested-by: Eric B Munson Acked-by: Andrea Arcangeli Acked-by: David Rientjes Cc: Mel Gorman Cc: Michael J Wolf Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Matt Mackall Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index d7e2af33407..26f9cc00102 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include #include #include @@ -385,8 +387,25 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - split_huge_page_pmd(walk->mm, pmd); - + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge(*pmd)) { + if (pmd_trans_splitting(*pmd)) { + spin_unlock(&walk->mm->page_table_lock); + wait_split_huge_page(vma->anon_vma, pmd); + } else { + smaps_pte_entry(*(pte_t *)pmd, addr, + HPAGE_PMD_SIZE, walk); + spin_unlock(&walk->mm->page_table_lock); + return 0; + } + } else { + spin_unlock(&walk->mm->page_table_lock); + } + /* + * The mmap_sem held all the way back in m_start() is what + * keeps khugepaged out of here and from collapsing things + * in here. + */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); -- cgit v1.2.3-70-g09d2 From 4031a219d8913da40ade5a6e5b538cc61e975cc8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Mar 2011 16:33:01 -0700 Subject: smaps: have smaps show transparent huge pages Now that the mere act of _looking_ at /proc/$pid/smaps will not destroy transparent huge pages, tell how much of the VMA is actually mapped with them. This way, we can make sure that we're getting THPs where we expect to see them. Signed-off-by: Dave Hansen Acked-by: Mel Gorman Acked-by: David Rientjes Reviewed-by: Eric B Munson Tested-by: Eric B Munson Cc: Michael J Wolf Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Matt Mackall Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 26f9cc00102..93381aae936 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -331,6 +331,7 @@ struct mem_size_stats { unsigned long private_dirty; unsigned long referenced; unsigned long anonymous; + unsigned long anonymous_thp; unsigned long swap; u64 pss; }; @@ -396,6 +397,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); spin_unlock(&walk->mm->page_table_lock); + mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } } else { @@ -444,6 +446,7 @@ static int show_smap(struct seq_file *m, void *v) "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" "Anonymous: %8lu kB\n" + "AnonHugePages: %8lu kB\n" "Swap: %8lu kB\n" "KernelPageSize: %8lu kB\n" "MMUPageSize: %8lu kB\n" @@ -457,6 +460,7 @@ static int show_smap(struct seq_file *m, void *v) mss.private_dirty >> 10, mss.referenced >> 10, mss.anonymous >> 10, + mss.anonymous_thp >> 10, mss.swap >> 10, vma_kernel_pagesize(vma) >> 10, vma_mmu_pagesize(vma) >> 10, -- cgit v1.2.3-70-g09d2 From ca6b0bf0e086513b9ee5efc0aa5770ecb57778af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 22:04:37 -0500 Subject: pagemap: close races with suid execve just use mm_for_maps() Signed-off-by: Al Viro --- fs/proc/base.c | 4 ++-- fs/proc/task_mmu.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index b77236de6d8..df73573e12c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2797,7 +2797,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), - REG("pagemap", S_IRUSR, proc_pagemap_operations), + REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -3133,7 +3133,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), - REG("pagemap", S_IRUSR, proc_pagemap_operations), + REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 60b914860f8..c966413c139 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -729,7 +729,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out; ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + mm = mm_for_maps(task); + if (!mm) goto out_task; ret = -EINVAL; @@ -742,10 +743,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; - mm = get_task_mm(task); - if (!mm) - goto out_task; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; -- cgit v1.2.3-70-g09d2 From ec6fd8a4355cda81cd9f06bebc048e83eb514ac7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 22:22:54 -0500 Subject: report errors in /proc/*/*map* sanely Signed-off-by: Al Viro --- fs/proc/base.c | 8 +++++--- fs/proc/task_mmu.c | 10 +++++----- fs/proc/task_nommu.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index df73573e12c..c2828110208 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -225,15 +225,17 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; + int err; - if (mutex_lock_killable(&task->signal->cred_guard_mutex)) - return NULL; + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); mm = get_task_mm(task); if (mm && mm != current->mm && !ptrace_may_access(task, PTRACE_MODE_READ)) { mmput(mm); - mm = NULL; + mm = ERR_PTR(-EACCES); } mutex_unlock(&task->signal->cred_guard_mutex); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c966413c139..8fed0f88fbf 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -119,11 +119,11 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) - return NULL; + if (!mm || IS_ERR(mm)) + return mm; down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task); @@ -728,9 +728,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!task) goto out; - ret = -EACCES; mm = mm_for_maps(task); - if (!mm) + ret = PTR_ERR(mm); + if (!mm || IS_ERR(mm)) goto out_task; ret = -EINVAL; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index b535d3e5d5f..980de547c07 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* pin the task and mm whilst we play with them */ priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) { + if (!mm || IS_ERR(mm)) { put_task_struct(priv->task); priv->task = NULL; - return NULL; + return mm; } down_read(&mm->mmap_sem); -- cgit v1.2.3-70-g09d2 From 31db58b3ab432f72ea76be58b12e6ffaf627d5db Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:15 -0400 Subject: mm: arch: make get_gate_vma take an mm_struct instead of a task_struct Morally, the presence of a gate vma is more an attribute of a particular mm than a particular task. Moreover, dropping the dependency on task_struct will help make both existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 6 +++--- arch/x86/vdso/vdso32-setup.c | 11 ++++++----- fs/binfmt_elf.c | 2 +- fs/proc/task_mmu.c | 8 +++++--- include/linux/mm.h | 2 +- mm/memory.c | 4 ++-- mm/mlock.c | 4 ++-- 10 files changed, 23 insertions(+), 20 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729ab..6169f175693 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -830,7 +830,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedb..d19f30504c6 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -347,7 +347,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 242117cbad6..3f9b6f41813 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -94,7 +94,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *task) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0aa34669ed3..dd4809b5844 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -861,10 +861,10 @@ static struct vm_area_struct gate_vma = { .vm_flags = VM_READ | VM_EXEC }; -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) + if (!mm || mm->context.ia32_compat) return NULL; #endif return &gate_vma; @@ -872,7 +872,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task); + struct vm_area_struct *vma = get_gate_vma(task->mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 36df991985b..1f651f6bdf6 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -417,11 +417,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - struct mm_struct *mm = tsk->mm; - - /* Check to see if this task was created in compat vdso mode */ + /* + * Check to see if the corresponding task was created in compat vdso + * mode. + */ if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) return &gate_vma; return NULL; @@ -429,7 +430,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task); + const struct vm_area_struct *vma = get_gate_vma(task->mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d5b640ba6cb..bbabdcce117 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); - gate_vma = get_gate_vma(current); + gate_vma = get_gate_vma(current->mm); if (gate_vma != NULL) segs++; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8fed0f88fbf..e73314afc53 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -126,7 +126,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) return mm; down_read(&mm->mmap_sem); - tail_vma = get_gate_vma(priv->task); + tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; /* Start with last addr hint */ @@ -277,7 +277,8 @@ static int show_map(struct seq_file *m, void *v) show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -436,7 +437,8 @@ static int show_smap(struct seq_file *m, void *v) (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 581703d86fb..18b4a6358ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1578,7 +1578,7 @@ static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ #endif -extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); +extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); int in_gate_area(struct task_struct *task, unsigned long addr); diff --git a/mm/memory.c b/mm/memory.c index e48945ab362..b6dc3709743 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1488,7 +1488,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, vma = find_extend_vma(mm, start); if (!vma && in_gate_area(tsk, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk); + struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -3496,7 +3496,7 @@ static int __init gate_vma_init(void) __initcall(gate_vma_init); #endif -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef AT_SYSINFO_EHDR return &gate_vma; diff --git a/mm/mlock.c b/mm/mlock.c index c3924c7f00b..2689a08c79a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -237,7 +237,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) { + vma == get_gate_vma(current->mm))) { __mlock_vma_pages_range(vma, start, end, NULL); @@ -332,7 +332,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, int lock = newflags & VM_LOCKED; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current)) + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) goto out; /* don't set VM_LOCKED, don't count */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); -- cgit v1.2.3-70-g09d2 From 0db0c01b53a1a421513f91573241aabafb87802a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 23 Mar 2011 16:42:50 -0700 Subject: procfs: fix /proc//maps heap check The current code fails to print the "[heap]" marking if the heap is split into multiple mappings. Fix the check so that the marking is displayed in all possible cases: 1. vma matches exactly the heap 2. the heap vma is merged e.g. with bss 3. the heap vma is splitted e.g. due to locked pages Test cases. In all cases, the process should have mapping(s) with [heap] marking: (1) vma matches exactly the heap #include #include #include int main (void) { if (sbrk(4096) != (void *)-1) { printf("check /proc/%d/maps\n", (int)getpid()); while (1) sleep(1); } return 0; } # ./test1 check /proc/553/maps [1] + Stopped ./test1 # cat /proc/553/maps | head -4 00008000-00009000 r-xp 00000000 01:00 3113640 /test1 00010000-00011000 rw-p 00000000 01:00 3113640 /test1 00011000-00012000 rw-p 00000000 00:00 0 [heap] 4006f000-40070000 rw-p 00000000 00:00 0 (2) the heap vma is merged #include #include #include char foo[4096] = "foo"; char bar[4096]; int main (void) { if (sbrk(4096) != (void *)-1) { printf("check /proc/%d/maps\n", (int)getpid()); while (1) sleep(1); } return 0; } # ./test2 check /proc/556/maps [2] + Stopped ./test2 # cat /proc/556/maps | head -4 00008000-00009000 r-xp 00000000 01:00 3116312 /test2 00010000-00012000 rw-p 00000000 01:00 3116312 /test2 00012000-00014000 rw-p 00000000 00:00 0 [heap] 4004a000-4004b000 rw-p 00000000 00:00 0 (3) the heap vma is splitted (this fails without the patch) #include #include #include #include int main (void) { if ((sbrk(4096) != (void *)-1) && !mlockall(MCL_FUTURE) && (sbrk(4096) != (void *)-1)) { printf("check /proc/%d/maps\n", (int)getpid()); while (1) sleep(1); } return 0; } # ./test3 check /proc/559/maps [1] + Stopped ./test3 # cat /proc/559/maps|head -4 00008000-00009000 r-xp 00000000 01:00 3119108 /test3 00010000-00011000 rw-p 00000000 01:00 3119108 /test3 00011000-00012000 rw-p 00000000 00:00 0 [heap] 00012000-00013000 rw-p 00000000 00:00 0 [heap] It looks like the bug has been there forever, and since it only results in some information missing from a procfile, it does not fulfil the -stable "critical issue" criteria. Signed-off-by: Aaro Koskinen Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 93381aae936..636f1a1fdf8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -251,8 +251,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) const char *name = arch_vma_name(vma); if (!name) { if (mm) { - if (vma->vm_start <= mm->start_brk && - vma->vm_end >= mm->brk) { + if (vma->vm_start <= mm->brk && + vma->vm_end >= mm->start_brk) { name = "[heap]"; } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { -- cgit v1.2.3-70-g09d2 From 76597cd31470fa130784c78fadb4dab2e624a723 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Mar 2011 19:09:29 -0700 Subject: proc: fix oops on invalid /proc//maps access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When m_start returns an error, the seq_file logic will still call m_stop with that error entry, so we'd better make sure that we check it before using it as a vma. Introduced by commit ec6fd8a4355c ("report errors in /proc/*/*map* sanely"), which replaced NULL with various ERR_PTR() cases. (On ia64, you happen to get a unaligned fault instead of a page fault, since the address used is generally some random error code like -EPERM) Reported-by: Anca Emanuel Reported-by: Tony Luck Cc: Al Viro Cc: Américo Wang Cc: Stephen Wilson Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/proc/task_mmu.c') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7c708a418ac..2e7addfd980 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -182,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v) struct proc_maps_private *priv = m->private; struct vm_area_struct *vma = v; - vma_stop(priv, vma); + if (!IS_ERR(vma)) + vma_stop(priv, vma); if (priv->task) put_task_struct(priv->task); } -- cgit v1.2.3-70-g09d2