diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:18:27 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-04-14 13:19:04 +0200 |
commit | 6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch) | |
tree | 021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /mm/memory.c | |
parent | 682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff) | |
parent | a385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff) |
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree),
to prepare for tooling changes, and also to pick up v3.4 MM
changes that the uprobes code needs to take care of.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 198 |
1 files changed, 102 insertions, 96 deletions
diff --git a/mm/memory.c b/mm/memory.c index fa2f04e0337..6105f475fa8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -125,17 +125,17 @@ core_initcall(init_zero_pfn); #if defined(SPLIT_RSS_COUNTING) -static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) +void sync_mm_rss(struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { - if (task->rss_stat.count[i]) { - add_mm_counter(mm, i, task->rss_stat.count[i]); - task->rss_stat.count[i] = 0; + if (current->rss_stat.count[i]) { + add_mm_counter(mm, i, current->rss_stat.count[i]); + current->rss_stat.count[i] = 0; } } - task->rss_stat.events = 0; + current->rss_stat.events = 0; } static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) @@ -157,30 +157,7 @@ static void check_sync_rss_stat(struct task_struct *task) if (unlikely(task != current)) return; if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) - __sync_task_rss_stat(task, task->mm); -} - -unsigned long get_mm_counter(struct mm_struct *mm, int member) -{ - long val = 0; - - /* - * Don't use task->mm here...for avoiding to use task_get_mm().. - * The caller must guarantee task->mm is not invalid. - */ - val = atomic_long_read(&mm->rss_stat.count[member]); - /* - * counter is updated in asynchronous manner and may go to minus. - * But it's never be expected number for users. - */ - if (val < 0) - return 0; - return (unsigned long)val; -} - -void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) -{ - __sync_task_rss_stat(task, mm); + sync_mm_rss(task->mm); } #else /* SPLIT_RSS_COUNTING */ @@ -661,7 +638,7 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) int i; if (current->mm == mm) - sync_mm_rss(current, mm); + sync_mm_rss(mm); for (i = 0; i < NR_MM_COUNTERS; i++) if (rss[i]) add_mm_counter(mm, i, rss[i]); @@ -1247,16 +1224,24 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, do { next = pmd_addr_end(addr, end); if (pmd_trans_huge(*pmd)) { - if (next-addr != HPAGE_PMD_SIZE) { + if (next - addr != HPAGE_PMD_SIZE) { VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); split_huge_page_pmd(vma->vm_mm, pmd); } else if (zap_huge_pmd(tlb, vma, pmd, addr)) - continue; + goto next; /* fall through */ } - if (pmd_none_or_clear_bad(pmd)) - continue; + /* + * Here there can be other concurrent MADV_DONTNEED or + * trans huge page faults running, and if the pmd is + * none or trans huge it can change under us. This is + * because MADV_DONTNEED holds the mmap_sem in read + * mode. + */ + if (pmd_none_or_trans_huge_or_clear_bad(pmd)) + goto next; next = zap_pte_range(tlb, vma, pmd, addr, next, details); +next: cond_resched(); } while (pmd++, addr = next, addr != end); @@ -1282,10 +1267,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, return addr; } -static unsigned long unmap_page_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, - unsigned long addr, unsigned long end, - struct zap_details *details) +static void unmap_page_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + struct zap_details *details) { pgd_t *pgd; unsigned long next; @@ -1305,8 +1290,47 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); mem_cgroup_uncharge_end(); +} - return addr; + +static void unmap_single_vma(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long start_addr, + unsigned long end_addr, unsigned long *nr_accounted, + struct zap_details *details) +{ + unsigned long start = max(vma->vm_start, start_addr); + unsigned long end; + + if (start >= vma->vm_end) + return; + end = min(vma->vm_end, end_addr); + if (end <= vma->vm_start) + return; + + if (vma->vm_flags & VM_ACCOUNT) + *nr_accounted += (end - start) >> PAGE_SHIFT; + + if (unlikely(is_pfn_mapping(vma))) + untrack_pfn_vma(vma, 0, 0); + + if (start != end) { + if (unlikely(is_vm_hugetlb_page(vma))) { + /* + * It is undesirable to test vma->vm_file as it + * should be non-null for valid hugetlb area. + * However, vm_file will be NULL in the error + * cleanup path of do_mmap_pgoff. When + * hugetlbfs ->mmap method fails, + * do_mmap_pgoff() nullifies vma->vm_file + * before calling this function to clean up. + * Since no pte has actually been setup, it is + * safe to do nothing in this case. + */ + if (vma->vm_file) + unmap_hugepage_range(vma, start, end, NULL); + } else + unmap_page_range(tlb, vma, start, end, details); + } } /** @@ -1318,8 +1342,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here * @details: details of nonlinear truncation or shared cache invalidation * - * Returns the end address of the unmapping (restart addr if interrupted). - * * Unmap all pages in the vma list. * * Only addresses between `start' and `end' will be unmapped. @@ -1331,55 +1353,18 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ -unsigned long unmap_vmas(struct mmu_gather *tlb, +void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *details) { - unsigned long start = start_addr; struct mm_struct *mm = vma->vm_mm; mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); - for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { - unsigned long end; - - start = max(vma->vm_start, start_addr); - if (start >= vma->vm_end) - continue; - end = min(vma->vm_end, end_addr); - if (end <= vma->vm_start) - continue; - - if (vma->vm_flags & VM_ACCOUNT) - *nr_accounted += (end - start) >> PAGE_SHIFT; - - if (unlikely(is_pfn_mapping(vma))) - untrack_pfn_vma(vma, 0, 0); - - while (start != end) { - if (unlikely(is_vm_hugetlb_page(vma))) { - /* - * It is undesirable to test vma->vm_file as it - * should be non-null for valid hugetlb area. - * However, vm_file will be NULL in the error - * cleanup path of do_mmap_pgoff. When - * hugetlbfs ->mmap method fails, - * do_mmap_pgoff() nullifies vma->vm_file - * before calling this function to clean up. - * Since no pte has actually been setup, it is - * safe to do nothing in this case. - */ - if (vma->vm_file) - unmap_hugepage_range(vma, start, end, NULL); - - start = end; - } else - start = unmap_page_range(tlb, vma, start, end, details); - } - } - + for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) + unmap_single_vma(tlb, vma, start_addr, end_addr, nr_accounted, + details); mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); - return start; /* which is now the end (or restart) address */ } /** @@ -1388,8 +1373,10 @@ unsigned long unmap_vmas(struct mmu_gather *tlb, * @address: starting address of pages to zap * @size: number of bytes to zap * @details: details of nonlinear truncation or shared cache invalidation + * + * Caller must protect the VMA list */ -unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, +void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { struct mm_struct *mm = vma->vm_mm; @@ -1400,9 +1387,34 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, lru_add_drain(); tlb_gather_mmu(&tlb, mm, 0); update_hiwater_rss(mm); - end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); + unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); + tlb_finish_mmu(&tlb, address, end); +} + +/** + * zap_page_range_single - remove user pages in a given range + * @vma: vm_area_struct holding the applicable pages + * @address: starting address of pages to zap + * @size: number of bytes to zap + * @details: details of nonlinear truncation or shared cache invalidation + * + * The range must fit into one VMA. + */ +static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details) +{ + struct mm_struct *mm = vma->vm_mm; + struct mmu_gather tlb; + unsigned long end = address + size; + unsigned long nr_accounted = 0; + + lru_add_drain(); + tlb_gather_mmu(&tlb, mm, 0); + update_hiwater_rss(mm); + mmu_notifier_invalidate_range_start(mm, address, end); + unmap_single_vma(&tlb, vma, address, end, &nr_accounted, details); + mmu_notifier_invalidate_range_end(mm, address, end); tlb_finish_mmu(&tlb, address, end); - return end; } /** @@ -1423,7 +1435,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, if (address < vma->vm_start || address + size > vma->vm_end || !(vma->vm_flags & VM_PFNMAP)) return -1; - zap_page_range(vma, address, size, NULL); + zap_page_range_single(vma, address, size, NULL); return 0; } EXPORT_SYMBOL_GPL(zap_vma_ptes); @@ -2447,7 +2459,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo * fails, we just zero-fill it. Live with it. */ if (unlikely(!src)) { - void *kaddr = kmap_atomic(dst, KM_USER0); + void *kaddr = kmap_atomic(dst); void __user *uaddr = (void __user *)(va & PAGE_MASK); /* @@ -2458,7 +2470,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) clear_page(kaddr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); flush_dcache_page(dst); } else copy_user_highpage(dst, src, va, vma); @@ -2770,7 +2782,7 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) { - zap_page_range(vma, start_addr, end_addr - start_addr, details); + zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } static inline void unmap_mapping_range_tree(struct prio_tree_root *root, @@ -3611,13 +3623,7 @@ static int __init gate_vma_init(void) gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; gate_vma.vm_page_prot = __P101; - /* - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } __initcall(gate_vma_init); |