diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 117 |
1 files changed, 64 insertions, 53 deletions
diff --git a/mm/memory.c b/mm/memory.c index 1cc6bfbd872..c6565f00fb3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -220,9 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long /* Is it from 0 to ~0? */ tlb->fullmm = !(start | (end+1)); tlb->need_flush_all = 0; - tlb->start = start; - tlb->end = end; - tlb->need_flush = 0; tlb->local.next = NULL; tlb->local.nr = 0; tlb->local.max = ARRAY_SIZE(tlb->__pages); @@ -232,15 +229,18 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb->batch = NULL; #endif + + __tlb_reset_range(tlb); } static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { - tlb->need_flush = 0; tlb_flush(tlb); + mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif + __tlb_reset_range(tlb); } static void tlb_flush_mmu_free(struct mmu_gather *tlb) @@ -256,8 +256,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->need_flush) + if (!tlb->end) return; + tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } @@ -292,7 +293,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { struct mmu_gather_batch *batch; - VM_BUG_ON(!tlb->need_flush); + VM_BUG_ON(!tlb->end); batch = tlb->active; batch->pages[batch->nr++] = page; @@ -359,8 +360,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - tlb->need_flush = 1; - /* * When there's less then two users of this mm there cannot be a * concurrent page-table walk. @@ -815,20 +814,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (!pte_file(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - if (likely(!non_swap_entry(entry))) + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } rss[MM_SWAPENTS]++; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); if (PageAnon(page)) @@ -1147,6 +1146,7 @@ again: print_bad_pte(vma, addr, ptent, page); if (unlikely(!__tlb_remove_page(tlb, page))) { force_flush = 1; + addr += PAGE_SIZE; break; } continue; @@ -1185,20 +1185,8 @@ again: arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ - if (force_flush) { - unsigned long old_end; - - /* - * Flush the TLB just for the previous segment, - * then update the range to be the remaining - * TLB range. - */ - old_end = tlb->end; - tlb->end = addr; + if (force_flush) tlb_flush_mmu_tlbonly(tlb); - tlb->start = addr; - tlb->end = old_end; - } pte_unmap_unlock(start_pte, ptl); /* @@ -1339,9 +1327,9 @@ static void unmap_single_vma(struct mmu_gather *tlb, * safe to do nothing in this case. */ if (vma->vm_file) { - mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); + i_mmap_lock_write(vma->vm_file->f_mapping); __unmap_hugepage_range_final(tlb, vma, start, end, NULL); - mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + i_mmap_unlock_write(vma->vm_file->f_mapping); } } else unmap_page_range(tlb, vma, start, end, details); @@ -2149,17 +2137,24 @@ reuse: if (!dirty_page) return ret; - /* - * Yes, Virginia, this is actually required to prevent a race - * with clear_page_dirty_for_io() from clearing the page dirty - * bit after it clear all dirty ptes, but before a racing - * do_wp_page installs a dirty pte. - * - * do_shared_fault is protected similarly. - */ if (!page_mkwrite) { - wait_on_page_locked(dirty_page); - set_page_dirty_balance(dirty_page); + struct address_space *mapping; + int dirtied; + + lock_page(dirty_page); + dirtied = set_page_dirty(dirty_page); + VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page); + mapping = dirty_page->mapping; + unlock_page(dirty_page); + + if (dirtied && mapping) { + /* + * Some device drivers do not set page.mapping + * but still dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); + } + /* file_update_time outside page_lock */ if (vma->vm_file) file_update_time(vma->vm_file); @@ -2233,7 +2228,7 @@ gotten: * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush(vma, address, page_table); + ptep_clear_flush_notify(vma, address, page_table); page_add_new_anon_rmap(new_page, vma, address); mem_cgroup_commit_charge(new_page, memcg, false); lru_cache_add_active_or_unevictable(new_page, vma); @@ -2390,12 +2385,12 @@ void unmap_mapping_range(struct address_space *mapping, details.last_index = ULONG_MAX; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } EXPORT_SYMBOL(unmap_mapping_range); @@ -2605,7 +2600,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (prev && prev->vm_end == address) return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - expand_downwards(vma, address - PAGE_SIZE); + return expand_downwards(vma, address - PAGE_SIZE); } if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { struct vm_area_struct *next = vma->vm_next; @@ -2614,7 +2609,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (next && next->vm_start == address + PAGE_SIZE) return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - expand_upwards(vma, address + PAGE_SIZE); + return expand_upwards(vma, address + PAGE_SIZE); } return 0; } @@ -2640,7 +2635,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_SIGBUS; /* Use the zero-page for reads */ - if (!(flags & FAULT_FLAG_WRITE)) { + if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), vma->vm_page_prot)); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); @@ -3008,6 +3003,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (set_page_dirty(fault_page)) dirtied = 1; + /* + * Take a local copy of the address_space - page.mapping may be zeroed + * by truncate after unlock_page(). The address_space itself remains + * pinned by vma->vm_file's reference. We rely on unlock_page()'s + * release semantics to prevent the compiler from undoing this copying. + */ mapping = fault_page->mapping; unlock_page(fault_page); if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { @@ -3201,7 +3202,16 @@ static int handle_pte_fault(struct mm_struct *mm, pte_t entry; spinlock_t *ptl; - entry = ACCESS_ONCE(*pte); + /* + * some architectures can have larger ptes than wordsize, + * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y, + * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses. + * The code below just needs a consistent view for the ifs and + * we later double check anyway with the ptl lock held. So here + * a barrier will do. + */ + entry = *pte; + barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { @@ -3378,6 +3388,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } +EXPORT_SYMBOL_GPL(handle_mm_fault); #ifndef __PAGETABLE_PUD_FOLDED /* |