diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 202 |
1 files changed, 124 insertions, 78 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c6e4dd3e1c0..df0694c6ade 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -61,7 +61,7 @@ int sysctl_memory_failure_early_kill __read_mostly = 0; int sysctl_memory_failure_recovery __read_mostly = 1; -atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); +atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE) @@ -784,12 +784,12 @@ static struct page_state { { sc|dirty, sc|dirty, "dirty swapcache", me_swapcache_dirty }, { sc|dirty, sc, "clean swapcache", me_swapcache_clean }, - { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, - { unevict, unevict, "clean unevictable LRU", me_pagecache_clean }, - { mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty }, { mlock, mlock, "clean mlocked LRU", me_pagecache_clean }, + { unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty }, + { unevict, unevict, "clean unevictable LRU", me_pagecache_clean }, + { lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty }, { lru|dirty, lru, "clean LRU", me_pagecache_clean }, @@ -1021,6 +1021,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) struct page *hpage; int res; unsigned int nr_pages; + unsigned long page_flags; if (!sysctl_memory_failure_recovery) panic("Memory failure from trap %d on page %lx", trapno, pfn); @@ -1039,8 +1040,18 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } - nr_pages = 1 << compound_trans_order(hpage); - atomic_long_add(nr_pages, &mce_bad_pages); + /* + * Currently errors on hugetlbfs pages are measured in hugepage units, + * so nr_pages should be 1 << compound_order. OTOH when errors are on + * transparent hugepages, they are supposed to be split and error + * measurement is done in normal page units. So nr_pages should be one + * in this case. + */ + if (PageHuge(p)) + nr_pages = 1 << compound_order(hpage); + else /* normal page or thp */ + nr_pages = 1; + atomic_long_add(nr_pages, &num_poisoned_pages); /* * We need/can do nothing about count=0 pages. @@ -1070,7 +1081,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) if (!PageHWPoison(hpage) || (hwpoison_filter(p) && TestClearPageHWPoison(p)) || (p != hpage && TestSetPageHWPoison(hpage))) { - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); return 0; } set_page_hwpoison_huge_page(hpage); @@ -1119,6 +1130,15 @@ int memory_failure(unsigned long pfn, int trapno, int flags) lock_page(hpage); /* + * We use page flags to determine what action should be taken, but + * the flags can be modified by the error containment action. One + * example is an mlocked page, where PG_mlocked is cleared by + * page_remove_rmap() in try_to_unmap_one(). So to determine page status + * correctly, we save a copy of the page flags at this time. + */ + page_flags = p->flags; + + /* * unpoison always clear PG_hwpoison inside page lock */ if (!PageHWPoison(p)) { @@ -1128,7 +1148,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); unlock_page(hpage); put_page(hpage); return 0; @@ -1176,12 +1196,19 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } res = -EBUSY; - for (ps = error_states;; ps++) { - if ((p->flags & ps->mask) == ps->res) { - res = page_action(ps, p, pfn); + /* + * The first check uses the current page flags which may not have any + * relevant information. The second check with the saved page flagss is + * carried out only if the first check can't determine the page status. + */ + for (ps = error_states;; ps++) + if ((p->flags & ps->mask) == ps->res) break; - } - } + if (!ps->mask) + for (ps = error_states;; ps++) + if ((page_flags & ps->mask) == ps->res) + break; + res = page_action(ps, p, pfn); out: unlock_page(hpage); return res; @@ -1323,7 +1350,7 @@ int unpoison_memory(unsigned long pfn) return 0; } if (TestClearPageHWPoison(p)) - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); return 0; } @@ -1337,7 +1364,7 @@ int unpoison_memory(unsigned long pfn) */ if (TestClearPageHWPoison(page)) { pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); - atomic_long_sub(nr_pages, &mce_bad_pages); + atomic_long_sub(nr_pages, &num_poisoned_pages); freeit = 1; if (PageHuge(page)) clear_page_hwpoison_huge_page(page); @@ -1368,7 +1395,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x) * that is not free, and 1 for any other page type. * For 1 the page is returned with increased page count, otherwise not. */ -static int get_any_page(struct page *p, unsigned long pfn, int flags) +static int __get_any_page(struct page *p, unsigned long pfn, int flags) { int ret; @@ -1393,11 +1420,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) if (!get_page_unless_zero(compound_head(p))) { if (PageHuge(p)) { pr_info("%s: %#lx free huge page\n", __func__, pfn); - ret = dequeue_hwpoisoned_huge_page(compound_head(p)); + ret = 0; } else if (is_free_buddy_page(p)) { pr_info("%s: %#lx free buddy page\n", __func__, pfn); - /* Set hwpoison bit while page is still isolated */ - SetPageHWPoison(p); ret = 0; } else { pr_info("%s: %#lx: unknown zero refcount page type %lx\n", @@ -1413,43 +1438,68 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) return ret; } +static int get_any_page(struct page *page, unsigned long pfn, int flags) +{ + int ret = __get_any_page(page, pfn, flags); + + if (ret == 1 && !PageHuge(page) && !PageLRU(page)) { + /* + * Try to free it. + */ + put_page(page); + shake_page(page, 1); + + /* + * Did it turn free? + */ + ret = __get_any_page(page, pfn, 0); + if (!PageLRU(page)) { + pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", + pfn, page->flags); + return -EIO; + } + } + return ret; +} + static int soft_offline_huge_page(struct page *page, int flags) { int ret; unsigned long pfn = page_to_pfn(page); struct page *hpage = compound_head(page); - ret = get_any_page(page, pfn, flags); - if (ret < 0) - return ret; - if (ret == 0) - goto done; - + /* + * This double-check of PageHWPoison is to avoid the race with + * memory_failure(). See also comment in __soft_offline_page(). + */ + lock_page(hpage); if (PageHWPoison(hpage)) { + unlock_page(hpage); put_page(hpage); pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); return -EBUSY; } + unlock_page(hpage); /* Keep page count to indicate a given hugepage is isolated. */ - ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false, + ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, MIGRATE_SYNC); put_page(hpage); if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); - return ret; - } -done: - if (!PageHWPoison(hpage)) + } else { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); atomic_long_add(1 << compound_trans_order(hpage), - &mce_bad_pages); - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); + &num_poisoned_pages); + } /* keep elevated page count for bad page */ return ret; } +static int __soft_offline_page(struct page *page, int flags); + /** * soft_offline_page - Soft offline a page. * @page: page to offline @@ -1478,9 +1528,11 @@ int soft_offline_page(struct page *page, int flags) unsigned long pfn = page_to_pfn(page); struct page *hpage = compound_trans_head(page); - if (PageHuge(page)) - return soft_offline_huge_page(page, flags); - if (PageTransHuge(hpage)) { + if (PageHWPoison(page)) { + pr_info("soft offline: %#lx page already poisoned\n", pfn); + return -EBUSY; + } + if (!PageHuge(page) && PageTransHuge(hpage)) { if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { pr_info("soft offline: %#lx: failed to split THP\n", pfn); @@ -1491,47 +1543,45 @@ int soft_offline_page(struct page *page, int flags) ret = get_any_page(page, pfn, flags); if (ret < 0) return ret; - if (ret == 0) - goto done; - - /* - * Page cache page we can handle? - */ - if (!PageLRU(page)) { - /* - * Try to free it. - */ - put_page(page); - shake_page(page, 1); - - /* - * Did it turn free? - */ - ret = get_any_page(page, pfn, 0); - if (ret < 0) - return ret; - if (ret == 0) - goto done; - } - if (!PageLRU(page)) { - pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", - pfn, page->flags); - return -EIO; + if (ret) { /* for in-use pages */ + if (PageHuge(page)) + ret = soft_offline_huge_page(page, flags); + else + ret = __soft_offline_page(page, flags); + } else { /* for free pages */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); + atomic_long_add(1 << compound_trans_order(hpage), + &num_poisoned_pages); + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + } } + /* keep elevated page count for bad page */ + return ret; +} - lock_page(page); - wait_on_page_writeback(page); +static int __soft_offline_page(struct page *page, int flags) +{ + int ret; + unsigned long pfn = page_to_pfn(page); /* - * Synchronized using the page lock with memory_failure() + * Check PageHWPoison again inside page lock because PageHWPoison + * is set by memory_failure() outside page lock. Note that + * memory_failure() also double-checks PageHWPoison inside page lock, + * so there's no race between soft_offline_page() and memory_failure(). */ + lock_page(page); + wait_on_page_writeback(page); if (PageHWPoison(page)) { unlock_page(page); put_page(page); pr_info("soft offline: %#lx page already poisoned\n", pfn); return -EBUSY; } - /* * Try to invalidate first. This should work for * non dirty unmapped page cache pages. @@ -1544,9 +1594,10 @@ int soft_offline_page(struct page *page, int flags) */ if (ret == 1) { put_page(page); - ret = 0; pr_info("soft_offline: %#lx: invalidated\n", pfn); - goto done; + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + return 0; } /* @@ -1563,28 +1614,23 @@ int soft_offline_page(struct page *page, int flags) if (!ret) { LIST_HEAD(pagelist); inc_zone_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + page_is_file_cache(page)); list_add(&page->lru, &pagelist); ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, - false, MIGRATE_SYNC, - MR_MEMORY_FAILURE); + MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { putback_lru_pages(&pagelist); pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); if (ret > 0) ret = -EIO; + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); } } else { pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", pfn, ret, page_count(page), page->flags); } - if (ret) - return ret; - -done: - atomic_long_add(1, &mce_bad_pages); - SetPageHWPoison(page); - /* keep elevated page count for bad page */ return ret; } |