diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 14 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 9 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 79 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/pagewalk.c | 70 |
7 files changed, 101 insertions, 82 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 03a89a2f464..362c329b83f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2325,7 +2325,12 @@ static void collapse_huge_page(struct mm_struct *mm, pte_unmap(pte); spin_lock(&mm->page_table_lock); BUG_ON(!pmd_none(*pmd)); - set_pmd_at(mm, address, pmd, _pmd); + /* + * We can only use set_pmd_at when establishing + * hugepmds and never for establishing regular pmds that + * points to regular pagetables. Use pmd_populate for that + */ + pmd_populate(mm, pmd, pmd_pgtable(_pmd)); spin_unlock(&mm->page_table_lock); anon_vma_unlock_write(vma->anon_vma); goto out; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cb1c9dedf9b..010d6c14129 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4108,8 +4108,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, if (mem_cgroup_disabled()) return NULL; - VM_BUG_ON(PageSwapCache(page)); - if (PageTransHuge(page)) { nr_pages <<= compound_order(page); VM_BUG_ON(!PageTransHuge(page)); @@ -4205,6 +4203,18 @@ void mem_cgroup_uncharge_page(struct page *page) if (page_mapped(page)) return; VM_BUG_ON(page->mapping && !PageAnon(page)); + /* + * If the page is in swap cache, uncharge should be deferred + * to the swap path, which also properly accounts swap usage + * and handles memcg lifetime. + * + * Note that this check is not stable and reclaim may add the + * page to swap cache at any time after this. However, if the + * page is not in swap cache by the time page->mapcount hits + * 0, there won't be any page table references to the swap + * slot, and reclaim will free it and not actually write the + * page to disk. + */ if (PageSwapCache(page)) return; __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a221fac1f47..1ad92b46753 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -720,9 +720,12 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, start = phys_start_pfn << PAGE_SHIFT; size = nr_pages * PAGE_SIZE; ret = release_mem_region_adjustable(&iomem_resource, start, size); - if (ret) - pr_warn("Unable to release resource <%016llx-%016llx> (%d)\n", - start, start + size - 1, ret); + if (ret) { + resource_size_t endres = start + size - 1; + + pr_warn("Unable to release resource <%pa-%pa> (%d)\n", + &start, &endres, ret); + } sections_to_remove = nr_pages / PAGES_PER_SECTION; for (i = 0; i < sections_to_remove; i++) { diff --git a/mm/migrate.c b/mm/migrate.c index 27ed22579fd..b1f57501de9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -165,7 +165,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, pte = arch_make_huge_pte(pte, vma, new, 0); } #endif - flush_cache_page(vma, addr, pte_pfn(pte)); + flush_dcache_page(new); set_pte_at(mm, addr, ptep, pte); if (PageHuge(new)) { diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index be04122fb27..6725ff18337 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm) int id; /* - * srcu_read_lock() here will block synchronize_srcu() in - * mmu_notifier_unregister() until all registered - * ->release() callouts this function makes have - * returned. + * SRCU here will block mmu_notifier_unregister until + * ->release returns. */ id = srcu_read_lock(&srcu); + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) + /* + * If ->release runs before mmu_notifier_unregister it must be + * handled, as it's the only way for the driver to flush all + * existing sptes and stop the driver from establishing any more + * sptes before all the pages in the mm are freed. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + srcu_read_unlock(&srcu, id); + spin_lock(&mm->mmu_notifier_mm->lock); while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { mn = hlist_entry(mm->mmu_notifier_mm->list.first, struct mmu_notifier, hlist); - /* - * Unlink. This will prevent mmu_notifier_unregister() - * from also making the ->release() callout. + * We arrived before mmu_notifier_unregister so + * mmu_notifier_unregister will do nothing other than to wait + * for ->release to finish and for mmu_notifier_unregister to + * return. */ hlist_del_init_rcu(&mn->hlist); - spin_unlock(&mm->mmu_notifier_mm->lock); - - /* - * Clear sptes. (see 'release' description in mmu_notifier.h) - */ - if (mn->ops->release) - mn->ops->release(mn, mm); - - spin_lock(&mm->mmu_notifier_mm->lock); } spin_unlock(&mm->mmu_notifier_mm->lock); /* - * All callouts to ->release() which we have done are complete. - * Allow synchronize_srcu() in mmu_notifier_unregister() to complete - */ - srcu_read_unlock(&srcu, id); - - /* - * mmu_notifier_unregister() may have unlinked a notifier and may - * still be calling out to it. Additionally, other notifiers - * may have been active via vmtruncate() et. al. Block here - * to ensure that all notifier callouts for this mm have been - * completed and the sptes are really cleaned up before returning - * to exit_mmap(). + * synchronize_srcu here prevents mmu_notifier_release from returning to + * exit_mmap (which would proceed with freeing all pages in the mm) + * until the ->release method returns, if it was invoked by + * mmu_notifier_unregister. + * + * The mmu_notifier_mm can't go away from under us because one mm_count + * is held by exit_mmap. */ synchronize_srcu(&srcu); } @@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_count) <= 0); - spin_lock(&mm->mmu_notifier_mm->lock); if (!hlist_unhashed(&mn->hlist)) { + /* + * SRCU here will force exit_mmap to wait for ->release to + * finish before freeing the pages. + */ int id; + id = srcu_read_lock(&srcu); /* - * Ensure we synchronize up with __mmu_notifier_release(). + * exit_mmap will block in mmu_notifier_release to guarantee + * that ->release is called before freeing the pages. */ - id = srcu_read_lock(&srcu); - - hlist_del_rcu(&mn->hlist); - spin_unlock(&mm->mmu_notifier_mm->lock); - if (mn->ops->release) mn->ops->release(mn, mm); + srcu_read_unlock(&srcu, id); + spin_lock(&mm->mmu_notifier_mm->lock); /* - * Allow __mmu_notifier_release() to complete. + * Can not use list_del_rcu() since __mmu_notifier_release + * can delete it before we hold the lock. */ - srcu_read_unlock(&srcu, id); - } else + hlist_del_init_rcu(&mn->hlist); spin_unlock(&mm->mmu_notifier_mm->lock); + } /* - * Wait for any running method to finish, including ->release() if it - * was run by __mmu_notifier_release() instead of us. + * Wait for any running method to finish, of course including + * ->release if it was run by mmu_notifier_relase instead of us. */ synchronize_srcu(&srcu); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 98cbdf6e553..378a15bcd64 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5158,7 +5158,7 @@ unsigned long free_reserved_area(unsigned long start, unsigned long end, for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) { if (poison) memset((void *)pos, poison, PAGE_SIZE); - free_reserved_page(virt_to_page(pos)); + free_reserved_page(virt_to_page((void *)pos)); } if (pages && s) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 35aa294656c..5da2cbcfdbb 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, return 0; } -static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) -{ - struct vm_area_struct *vma; - - /* We don't need vma lookup at all. */ - if (!walk->hugetlb_entry) - return NULL; - - VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); - vma = find_vma(walk->mm, addr); - if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) - return vma; - - return NULL; -} - #else /* CONFIG_HUGETLB_PAGE */ -static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) -{ - return NULL; -} - static int walk_hugetlb_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end, if (!walk->mm) return -EINVAL; + VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); + pgd = pgd_offset(walk->mm, addr); do { - struct vm_area_struct *vma; + struct vm_area_struct *vma = NULL; next = pgd_addr_end(addr, end); /* - * handle hugetlb vma individually because pagetable walk for - * the hugetlb page is dependent on the architecture and - * we can't handled it in the same manner as non-huge pages. + * This function was not intended to be vma based. + * But there are vma special cases to be handled: + * - hugetlb vma's + * - VM_PFNMAP vma's */ - vma = hugetlb_vma(addr, walk); + vma = find_vma(walk->mm, addr); if (vma) { - if (vma->vm_end < next) + /* + * There are no page structures backing a VM_PFNMAP + * range, so do not allow split_huge_page_pmd(). + */ + if ((vma->vm_start <= addr) && + (vma->vm_flags & VM_PFNMAP)) { next = vma->vm_end; + pgd = pgd_offset(walk->mm, next); + continue; + } /* - * Hugepage is very tightly coupled with vma, so - * walk through hugetlb entries within a given vma. + * Handle hugetlb vma individually because pagetable + * walk for the hugetlb page is dependent on the + * architecture and we can't handled it in the same + * manner as non-huge pages. */ - err = walk_hugetlb_range(vma, addr, next, walk); - if (err) - break; - pgd = pgd_offset(walk->mm, next); - continue; + if (walk->hugetlb_entry && (vma->vm_start <= addr) && + is_vm_hugetlb_page(vma)) { + if (vma->vm_end < next) + next = vma->vm_end; + /* + * Hugepage is very tightly coupled with vma, + * so walk through hugetlb entries within a + * given vma. + */ + err = walk_hugetlb_range(vma, addr, next, walk); + if (err) + break; + pgd = pgd_offset(walk->mm, next); + continue; + } } if (pgd_none_or_clear_bad(pgd)) { |