diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/fremap.c | 8 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 10 | ||||
-rw-r--r-- | mm/mlock.c | 44 | ||||
-rw-r--r-- | mm/util.c | 5 |
6 files changed, 55 insertions, 23 deletions
diff --git a/mm/fremap.c b/mm/fremap.c index 5bff0814776..bbc4d660221 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -208,9 +208,10 @@ get_write_lock: if (mapping_cap_account_dirty(mapping)) { unsigned long addr; struct file *file = get_file(vma->vm_file); + /* mmap_region may free vma; grab the info now */ + vm_flags = vma->vm_flags; - addr = mmap_region(file, start, size, - vma->vm_flags, pgoff); + addr = mmap_region(file, start, size, vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; @@ -218,7 +219,7 @@ get_write_lock: BUG_ON(addr != start); err = 0; } - goto out; + goto out_freed; } mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); @@ -253,6 +254,7 @@ get_write_lock: out: if (vma) vm_flags = vma->vm_flags; +out_freed: if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7de1bf85f68..95d1acb0f3d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -883,9 +883,6 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_unlock; } - /* mmap_sem prevents this happening but warn if that changes */ - WARN_ON(pmd_trans_migrating(pmd)); - if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ spin_unlock(src_ptl); @@ -1157,7 +1154,7 @@ alloc: new_page = NULL; if (unlikely(!new_page)) { - if (is_huge_zero_pmd(orig_pmd)) { + if (!page) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, address, pmd, orig_pmd, haddr); } else { @@ -1184,7 +1181,7 @@ alloc: count_vm_event(THP_FAULT_ALLOC); - if (is_huge_zero_pmd(orig_pmd)) + if (!page) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); @@ -1210,7 +1207,7 @@ alloc: page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache_pmd(vma, address, pmd); - if (is_huge_zero_pmd(orig_pmd)) { + if (!page) { add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); put_huge_zero_page(); } else { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bf5e8945714..7f1a356153c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -338,7 +338,7 @@ struct mem_cgroup { static size_t memcg_size(void) { return sizeof(struct mem_cgroup) + - nr_node_ids * sizeof(struct mem_cgroup_per_node); + nr_node_ids * sizeof(struct mem_cgroup_per_node *); } /* internal only representation about the status of kmem accounting. */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index db08af92c6f..fabe55046c1 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -938,6 +938,16 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, BUG_ON(!PageHWPoison(p)); return SWAP_FAIL; } + /* + * We pinned the head page for hwpoison handling, + * now we split the thp and we are interested in + * the hwpoisoned raw page, so move the refcount + * to it. + */ + if (hpage != p) { + put_page(hpage); + get_page(p); + } /* THP is split, so ppage should be the real poisoned page. */ ppage = p; } diff --git a/mm/mlock.c b/mm/mlock.c index d480cd6fc47..192e6eebe4f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) /** * munlock_vma_page - munlock a vma page - * @page - page to be unlocked + * @page - page to be unlocked, either a normal page or THP page head + * + * returns the size of the page as a page mask (0 for normal page, + * HPAGE_PMD_NR - 1 for THP head page) * * called from munlock()/munmap() path with page supposedly on the LRU. * When we munlock a page, because the vma where we found the page is being @@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int page_mask = 0; + unsigned int nr_pages; BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { - unsigned int nr_pages = hpage_nr_pages(page); + nr_pages = hpage_nr_pages(page); mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - page_mask = nr_pages - 1; if (!isolate_lru_page(page)) __munlock_isolated_page(page); else __munlock_isolation_failed(page); + } else { + nr_pages = hpage_nr_pages(page); } - return page_mask; + /* + * Regardless of the original PageMlocked flag, we determine nr_pages + * after touching the flag. This leaves a possible race with a THP page + * split, such that a whole THP page was munlocked, but nr_pages == 1. + * Returning a smaller mask due to that is OK, the worst that can + * happen is subsequent useless scanning of the former tail pages. + * The NR_MLOCK accounting can however become broken. + */ + return nr_pages - 1; } /** @@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked = -nr; + int delta_munlocked; struct pagevec pvec_putback; int pgrescued = 0; + pagevec_init(&pvec_putback, 0); + /* Phase 1: page isolation */ spin_lock_irq(&zone->lru_lock); for (i = 0; i < nr; i++) { @@ -318,18 +332,21 @@ skip_munlock: /* * We won't be munlocking this page in the next phase * but we still need to release the follow_page_mask() - * pin. + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release would deadlock. */ + pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; - put_page(page); - delta_munlocked++; } } + delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(&zone->lru_lock); + /* Now we can release pins of pages that we are not munlocking */ + pagevec_release(&pvec_putback); + /* Phase 2: page munlock */ - pagevec_init(&pvec_putback, 0); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; @@ -440,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page = NULL; - unsigned int page_mask, page_increm; + unsigned int page_mask; + unsigned long page_increm; struct pagevec pvec; struct zone *zone; int zoneid; @@ -490,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, goto next; } } - page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + /* It's a bug to munlock in the middle of a THP page */ + VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); + page_increm = 1 + page_mask; start += page_increm * PAGE_SIZE; next: cond_resched(); diff --git a/mm/util.c b/mm/util.c index f7bc2096071..808f375648e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -390,7 +390,10 @@ struct address_space *page_mapping(struct page *page) { struct address_space *mapping = page->mapping; - VM_BUG_ON(PageSlab(page)); + /* This happens if someone calls flush_dcache_page on slab page */ + if (unlikely(PageSlab(page))) + return NULL; + if (unlikely(PageSwapCache(page))) { swp_entry_t entry; |