diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 181 |
1 files changed, 113 insertions, 68 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index bb940045fe8..482a33d8913 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -36,6 +36,7 @@ #include <linux/hugetlb_cgroup.h> #include <linux/gfp.h> #include <linux/balloon_compaction.h> +#include <linux/mmu_notifier.h> #include <asm/tlbflush.h> @@ -71,28 +72,12 @@ int migrate_prep_local(void) } /* - * Add isolated pages on the list back to the LRU under page lock - * to avoid leaking evictable pages back onto unevictable list. - */ -void putback_lru_pages(struct list_head *l) -{ - struct page *page; - struct page *page2; - - list_for_each_entry_safe(page, page2, l, lru) { - list_del(&page->lru); - dec_zone_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); - putback_lru_page(page); - } -} - -/* * Put previously isolated pages back onto the appropriate lists * from where they were once taken off for compaction/migration. * - * This function shall be used instead of putback_lru_pages(), - * whenever the isolated pageset has been built by isolate_migratepages_range() + * This function shall be used whenever the isolated pageset has been + * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() + * and isolate_huge_page(). */ void putback_movable_pages(struct list_head *l) { @@ -198,7 +183,12 @@ out: */ static void remove_migration_ptes(struct page *old, struct page *new) { - rmap_walk(new, remove_migration_pte, old); + struct rmap_walk_control rwc = { + .rmap_one = remove_migration_pte, + .arg = old, + }; + + rmap_walk(new, &rwc); } /* @@ -316,14 +306,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, */ int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, - struct buffer_head *head, enum migrate_mode mode) + struct buffer_head *head, enum migrate_mode mode, + int extra_count) { - int expected_count = 0; + int expected_count = 1 + extra_count; void **pslot; if (!mapping) { /* Anonymous page without mapping */ - if (page_count(page) != 1) + if (page_count(page) != expected_count) return -EAGAIN; return MIGRATEPAGE_SUCCESS; } @@ -333,7 +324,7 @@ int migrate_page_move_mapping(struct address_space *mapping, pslot = radix_tree_lookup_slot(&mapping->page_tree, page_index(page)); - expected_count = 2 + page_has_private(page); + expected_count += 1 + page_has_private(page); if (page_count(page) != expected_count || radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { spin_unlock_irq(&mapping->tree_lock); @@ -508,7 +499,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) if (PageUptodate(page)) SetPageUptodate(newpage); if (TestClearPageActive(page)) { - VM_BUG_ON(PageUnevictable(page)); + VM_BUG_ON_PAGE(PageUnevictable(page), page); SetPageActive(newpage); } else if (TestClearPageUnevictable(page)) SetPageUnevictable(newpage); @@ -561,14 +552,6 @@ void migrate_page_copy(struct page *newpage, struct page *page) * Migration functions ***********************************************************/ -/* Always fail migration. Used for mappings that are not movable */ -int fail_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) -{ - return -EIO; -} -EXPORT_SYMBOL(fail_migrate_page); - /* * Common logic to directly migrate a single page suitable for * pages that do not use PagePrivate/PagePrivate2. @@ -583,7 +566,7 @@ int migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -610,7 +593,7 @@ int buffer_migrate_page(struct address_space *mapping, head = page_buffers(page); - rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); + rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -888,7 +871,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * free the metadata, so the page can be freed. */ if (!page->mapping) { - VM_BUG_ON(PageAnon(page)); + VM_BUG_ON_PAGE(PageAnon(page), page); if (page_has_private(page)) { try_to_free_buffers(page); goto uncharge; @@ -1006,7 +989,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, { int rc = 0; int *result = NULL; - struct page *new_hpage = get_new_page(hpage, private, &result); + struct page *new_hpage; struct anon_vma *anon_vma = NULL; /* @@ -1016,9 +999,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, * tables or check whether the hugepage is pmd-based or not before * kicking migration. */ - if (!hugepage_migration_support(page_hstate(hpage))) + if (!hugepage_migration_support(page_hstate(hpage))) { + putback_active_hugepage(hpage); return -ENOSYS; + } + new_hpage = get_new_page(hpage, private, &result); if (!new_hpage) return -ENOMEM; @@ -1118,7 +1104,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, nr_succeeded++; break; default: - /* Permanent failure */ + /* + * Permanent failure (-EBUSY, -ENOSYS, etc.): + * unlike -EAGAIN case, the failed page is + * removed from migration page list and not + * retried in the next outer loop. + */ nr_failed++; break; } @@ -1557,8 +1548,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page, __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & ~GFP_IOFS, 0); - if (newpage) - page_cpupid_xchg_last(newpage, page_cpupid_last(page)); return newpage; } @@ -1592,35 +1581,42 @@ bool migrate_ratelimited(int node) } /* Returns true if the node is migrate rate-limited after the update */ -bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages) +static bool numamigrate_update_ratelimit(pg_data_t *pgdat, + unsigned long nr_pages) { - bool rate_limited = false; - /* * Rate-limit the amount of data that is being migrated to a node. * Optimal placement is no good if the memory bus is saturated and * all the time is being spent migrating! */ - spin_lock(&pgdat->numabalancing_migrate_lock); if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { + spin_lock(&pgdat->numabalancing_migrate_lock); pgdat->numabalancing_migrate_nr_pages = 0; pgdat->numabalancing_migrate_next_window = jiffies + msecs_to_jiffies(migrate_interval_millisecs); + spin_unlock(&pgdat->numabalancing_migrate_lock); } - if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) - rate_limited = true; - else - pgdat->numabalancing_migrate_nr_pages += nr_pages; - spin_unlock(&pgdat->numabalancing_migrate_lock); - - return rate_limited; + if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { + trace_mm_numa_migrate_ratelimit(current, pgdat->node_id, + nr_pages); + return true; + } + + /* + * This is an unlocked non-atomic update so errors are possible. + * The consequences are failing to migrate when we potentiall should + * have which is not severe enough to warrant locking. If it is ever + * a problem, it can be converted to a per-cpu counter. + */ + pgdat->numabalancing_migrate_nr_pages += nr_pages; + return false; } -int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) +static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { int page_lru; - VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); + VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page); /* Avoid migrating to a node that is nearly full */ if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) @@ -1654,6 +1650,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) return 1; } +bool pmd_trans_migrating(pmd_t pmd) +{ + struct page *page = pmd_page(pmd); + return PageLocked(page); +} + +void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) +{ + struct page *page = pmd_page(*pmd); + wait_on_page_locked(page); +} + /* * Attempt to migrate a misplaced page to the specified destination * node. Caller is expected to have an elevated reference count on @@ -1691,7 +1699,12 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); if (nr_remaining) { - putback_lru_pages(&migratepages); + if (!list_empty(&migratepages)) { + list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + putback_lru_page(page); + } isolated = 0; } else count_vm_numa_event(NUMA_PAGE_MIGRATE); @@ -1716,12 +1729,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, struct page *page, int node) { spinlock_t *ptl; - unsigned long haddr = address & HPAGE_PMD_MASK; pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; struct page *new_page = NULL; struct mem_cgroup *memcg = NULL; int page_lru = page_is_file_cache(page); + unsigned long mmun_start = address & HPAGE_PMD_MASK; + unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; + pmd_t orig_entry; /* * Rate-limit the amount of data that is being migrated to a node. @@ -1736,14 +1751,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, if (!new_page) goto out_fail; - page_cpupid_xchg_last(new_page, page_cpupid_last(page)); - isolated = numamigrate_isolate_page(pgdat, page); if (!isolated) { put_page(new_page); goto out_fail; } + if (mm_tlb_flush_pending(mm)) + flush_tlb_range(vma, mmun_start, mmun_end); + /* Prepare a page as a migration target */ __set_page_locked(new_page); SetPageSwapBacked(new_page); @@ -1755,9 +1771,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, WARN_ON(PageLRU(new_page)); /* Recheck the target PMD */ + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); ptl = pmd_lock(mm, pmd); - if (unlikely(!pmd_same(*pmd, entry))) { + if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { +fail_putback: spin_unlock(ptl); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); /* Reverse changes made by migrate_page_copy() */ if (TestClearPageActive(new_page)) @@ -1774,7 +1793,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, putback_lru_page(page); mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); - goto out_fail; + + goto out_unlock; } /* @@ -1786,16 +1806,35 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, */ mem_cgroup_prepare_migration(page, new_page, &memcg); + orig_entry = *pmd; entry = mk_pmd(new_page, vma->vm_page_prot); - entry = pmd_mknonnuma(entry); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = pmd_mkhuge(entry); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - pmdp_clear_flush(vma, haddr, pmd); - set_pmd_at(mm, haddr, pmd, entry); - page_add_new_anon_rmap(new_page, vma, haddr); + /* + * Clear the old entry under pagetable lock and establish the new PTE. + * Any parallel GUP will either observe the old page blocking on the + * page lock, block on the page table lock or observe the new page. + * The SetPageUptodate on the new page and page_add_new_anon_rmap + * guarantee the copy is visible before the pagetable update. + */ + flush_cache_range(vma, mmun_start, mmun_end); + page_add_new_anon_rmap(new_page, vma, mmun_start); + pmdp_clear_flush(vma, mmun_start, pmd); + set_pmd_at(mm, mmun_start, pmd, entry); + flush_tlb_range(vma, mmun_start, mmun_end); update_mmu_cache_pmd(vma, address, &entry); + + if (page_count(page) != 2) { + set_pmd_at(mm, mmun_start, pmd, orig_entry); + flush_tlb_range(vma, mmun_start, mmun_end); + update_mmu_cache_pmd(vma, address, &entry); + page_remove_rmap(new_page); + goto fail_putback; + } + page_remove_rmap(page); + /* * Finish the charge transaction under the page table lock to * prevent split_huge_page() from dividing up the charge @@ -1803,6 +1842,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, */ mem_cgroup_end_migration(memcg, page, new_page, true); spin_unlock(ptl); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(new_page); unlock_page(page); @@ -1820,10 +1860,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, out_fail: count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); out_dropref: - entry = pmd_mknonnuma(entry); - set_pmd_at(mm, haddr, pmd, entry); - update_mmu_cache_pmd(vma, address, &entry); + ptl = pmd_lock(mm, pmd); + if (pmd_same(*pmd, entry)) { + entry = pmd_mknonnuma(entry); + set_pmd_at(mm, mmun_start, pmd, entry); + update_mmu_cache_pmd(vma, address, &entry); + } + spin_unlock(ptl); +out_unlock: unlock_page(page); put_page(page); return 0; |