summaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c95
1 files changed, 61 insertions, 34 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index d9d42316a99..3e8491c504f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -103,6 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
* LOCK should suffice since the actual taking of the lock must
* happen _before_ what follows.
*/
+ might_sleep();
if (rwsem_is_locked(&anon_vma->root->rwsem)) {
anon_vma_lock_write(anon_vma);
anon_vma_unlock_write(anon_vma);
@@ -426,8 +427,9 @@ struct anon_vma *page_get_anon_vma(struct page *page)
* above cannot corrupt).
*/
if (!page_mapped(page)) {
+ rcu_read_unlock();
put_anon_vma(anon_vma);
- anon_vma = NULL;
+ return NULL;
}
out:
rcu_read_unlock();
@@ -477,9 +479,9 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)
}
if (!page_mapped(page)) {
+ rcu_read_unlock();
put_anon_vma(anon_vma);
- anon_vma = NULL;
- goto out;
+ return NULL;
}
/* we pinned the anon_vma, its safe to sleep */
@@ -515,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
static inline unsigned long
__vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
- if (unlikely(is_vm_hugetlb_page(vma)))
- pgoff = page->index << huge_page_order(page_hstate(page));
-
+ pgoff_t pgoff = page_to_pgoff(page);
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
}
@@ -567,6 +565,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd = NULL;
+ pmd_t pmde;
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
@@ -577,7 +576,13 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
goto out;
pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
+ /*
+ * Some THP functions use the sequence pmdp_clear_flush(), set_pmd_at()
+ * without holding anon_vma lock for write. So when looking for a
+ * genuine pmde (in which to find pte), test present and !THP together.
+ */
+ pmde = ACCESS_ONCE(*pmd);
+ if (!pmd_present(pmde) || pmd_trans_huge(pmde))
pmd = NULL;
out:
return pmd;
@@ -613,9 +618,6 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
if (!pmd)
return NULL;
- if (pmd_trans_huge(*pmd))
- return NULL;
-
pte = pte_offset_map(pmd, address);
/* Make a quick check before getting the lock */
if (!sync && !pte_present(*pte)) {
@@ -669,7 +671,7 @@ struct page_referenced_arg {
/*
* arg: page_referenced_arg will be passed
*/
-int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
@@ -986,6 +988,12 @@ void do_page_add_anon_rmap(struct page *page,
{
int first = atomic_inc_and_test(&page->_mapcount);
if (first) {
+ /*
+ * We use the irq-unsafe __{inc|mod}_zone_page_stat because
+ * these counters are not modified in interrupt context, and
+ * pte lock(a spinlock) is held, which implies preemption
+ * disabled.
+ */
if (PageTransHuge(page))
__inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
@@ -1024,11 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
__page_set_anon_rmap(page, vma, address, 1);
- if (!mlocked_vma_newpage(vma, page)) {
- SetPageActive(page);
- lru_cache_add(page);
- } else
- add_page_to_unevictable_list(page);
}
/**
@@ -1077,11 +1080,15 @@ void page_remove_rmap(struct page *page)
/*
* Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
* and not charged by memcg for now.
+ *
+ * We use the irq-unsafe __{inc|mod}_zone_page_stat because
+ * these counters are not modified in interrupt context, and
+ * these counters are not modified in interrupt context, and
+ * pte lock(a spinlock) is held, which implies preemption disabled.
*/
if (unlikely(PageHuge(page)))
goto out;
if (anon) {
- mem_cgroup_uncharge_page(page);
if (PageTransHuge(page))
__dec_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
@@ -1112,7 +1119,7 @@ out:
/*
* @arg: enum ttu_flags will be passed to this argument
*/
-int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
{
struct mm_struct *mm = vma->vm_mm;
@@ -1135,7 +1142,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKED)
goto out_mlock;
- if (TTU_ACTION(flags) == TTU_MUNLOCK)
+ if (flags & TTU_MUNLOCK)
goto out_unmap;
}
if (!(flags & TTU_IGNORE_ACCESS)) {
@@ -1165,6 +1172,16 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
set_pte_at(mm, address, pte,
swp_entry_to_pte(make_hwpoison_entry(page)));
+ } else if (pte_unused(pteval)) {
+ /*
+ * The guest indicated that the page content is of no
+ * interest anymore. Simply discard the pte, vmscan
+ * will take care of the rest.
+ */
+ if (PageAnon(page))
+ dec_mm_counter(mm, MM_ANONPAGES);
+ else
+ dec_mm_counter(mm, MM_FILEPAGES);
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) };
pte_t swp_pte;
@@ -1193,7 +1210,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling.
*/
- BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
+ BUG_ON(!(flags & TTU_MIGRATION));
entry = make_migration_entry(page, pte_write(pteval));
}
swp_pte = swp_entry_to_pte(entry);
@@ -1202,7 +1219,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
set_pte_at(mm, address, pte, swp_pte);
BUG_ON(pte_file(*pte));
} else if (IS_ENABLED(CONFIG_MIGRATION) &&
- (TTU_ACTION(flags) == TTU_MIGRATION)) {
+ (flags & TTU_MIGRATION)) {
/* Establish migration entry for a file page */
swp_entry_t entry;
entry = make_migration_entry(page, pte_write(pteval));
@@ -1215,7 +1232,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
out_unmap:
pte_unmap_unlock(pte, ptl);
- if (ret != SWAP_FAIL)
+ if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK))
mmu_notifier_invalidate_page(mm, address);
out:
return ret;
@@ -1322,9 +1339,19 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
BUG_ON(!page || PageAnon(page));
if (locked_vma) {
- mlock_vma_page(page); /* no-op if already mlocked */
- if (page == check_page)
+ if (page == check_page) {
+ /* we know we have check_page locked */
+ mlock_vma_page(page);
ret = SWAP_MLOCK;
+ } else if (trylock_page(page)) {
+ /*
+ * If we can lock the page, perform mlock.
+ * Otherwise leave the page alone, it will be
+ * eventually encountered again later.
+ */
+ mlock_vma_page(page);
+ unlock_page(page);
+ }
continue; /* don't unmap */
}
@@ -1339,7 +1366,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
if (page->index != linear_page_index(vma, address)) {
pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(pteval))
- pte_file_mksoft_dirty(ptfile);
+ ptfile = pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, address, pte, ptfile);
}
@@ -1360,8 +1387,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
}
static int try_to_unmap_nonlinear(struct page *page,
- struct address_space *mapping, struct vm_area_struct *vma)
+ struct address_space *mapping, void *arg)
{
+ struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
unsigned long cursor;
unsigned long max_nl_cursor = 0;
@@ -1491,7 +1519,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
* locking requirements of exec(), migration skips
* temporary VMAs until after exec() completes.
*/
- if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page))
+ if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page))
rwc.invalid_vma = invalid_migration_vma;
ret = rmap_walk(page, &rwc);
@@ -1543,10 +1571,9 @@ void __put_anon_vma(struct anon_vma *anon_vma)
{
struct anon_vma *root = anon_vma->root;
+ anon_vma_free(anon_vma);
if (root != anon_vma && atomic_dec_and_test(&root->refcount))
anon_vma_free(root);
-
- anon_vma_free(anon_vma);
}
static struct anon_vma *rmap_walk_anon_lock(struct page *page,
@@ -1588,7 +1615,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_to_pgoff(page);
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
@@ -1629,7 +1656,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
{
struct address_space *mapping = page->mapping;
- pgoff_t pgoff = page->index << compound_order(page);
+ pgoff_t pgoff = page_to_pgoff(page);
struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
@@ -1663,7 +1690,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
if (list_empty(&mapping->i_mmap_nonlinear))
goto done;
- ret = rwc->file_nonlinear(page, mapping, vma);
+ ret = rwc->file_nonlinear(page, mapping, rwc->arg);
done:
mutex_unlock(&mapping->i_mmap_mutex);