summaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c142
1 files changed, 92 insertions, 50 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 004c9c2aac7..470dcda10ad 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -244,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf,
enum transparent_hugepage_flag flag)
{
- if (test_bit(flag, &transparent_hugepage_flags))
- return sprintf(buf, "[yes] no\n");
- else
- return sprintf(buf, "yes [no]\n");
+ return sprintf(buf, "%d\n",
+ !!test_bit(flag, &transparent_hugepage_flags));
}
+
static ssize_t single_flag_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count,
enum transparent_hugepage_flag flag)
{
- if (!memcmp("yes", buf,
- min(sizeof("yes")-1, count))) {
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &value);
+ if (ret < 0)
+ return ret;
+ if (value > 1)
+ return -EINVAL;
+
+ if (value)
set_bit(flag, &transparent_hugepage_flags);
- } else if (!memcmp("no", buf,
- min(sizeof("no")-1, count))) {
+ else
clear_bit(flag, &transparent_hugepage_flags);
- } else
- return -EINVAL;
return count;
}
@@ -643,23 +647,24 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
return ret;
}
-static inline gfp_t alloc_hugepage_gfpmask(int defrag)
+static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
{
- return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
+ return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
}
static inline struct page *alloc_hugepage_vma(int defrag,
struct vm_area_struct *vma,
- unsigned long haddr)
+ unsigned long haddr, int nd,
+ gfp_t extra_gfp)
{
- return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
- HPAGE_PMD_ORDER, vma, haddr);
+ return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp),
+ HPAGE_PMD_ORDER, vma, haddr, nd);
}
#ifndef CONFIG_NUMA
static inline struct page *alloc_hugepage(int defrag)
{
- return alloc_pages(alloc_hugepage_gfpmask(defrag),
+ return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
HPAGE_PMD_ORDER);
}
#endif
@@ -678,9 +683,12 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(khugepaged_enter(vma)))
return VM_FAULT_OOM;
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
- vma, haddr);
- if (unlikely(!page))
+ vma, haddr, numa_node_id(), 0);
+ if (unlikely(!page)) {
+ count_vm_event(THP_FAULT_FALLBACK);
goto out;
+ }
+ count_vm_event(THP_FAULT_ALLOC);
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
put_page(page);
goto out;
@@ -799,8 +807,9 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
}
for (i = 0; i < HPAGE_PMD_NR; i++) {
- pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
- vma, address);
+ pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
+ __GFP_OTHER_NODE,
+ vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
mem_cgroup_newpage_charge(pages[i], mm,
GFP_KERNEL))) {
@@ -902,16 +911,18 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow())
new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
- vma, haddr);
+ vma, haddr, numa_node_id(), 0);
else
new_page = NULL;
if (unlikely(!new_page)) {
+ count_vm_event(THP_FAULT_FALLBACK);
ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
pmd, orig_pmd, page, haddr);
put_page(page);
goto out;
}
+ count_vm_event(THP_FAULT_ALLOC);
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
put_page(new_page);
@@ -1162,7 +1173,12 @@ static void __split_huge_page_refcount(struct page *page)
/* after clearing PageTail the gup refcount can be released */
smp_mb();
- page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+ /*
+ * retain hwpoison flag of the poisoned tail page:
+ * fix for the unsuitable process killed on Guest Machine(KVM)
+ * by the memory-failure.
+ */
+ page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
page_tail->flags |= (page->flags &
((1L << PG_referenced) |
(1L << PG_swapbacked) |
@@ -1203,6 +1219,8 @@ static void __split_huge_page_refcount(struct page *page)
BUG_ON(!PageDirty(page_tail));
BUG_ON(!PageSwapBacked(page_tail));
+ mem_cgroup_split_huge_fixup(page, page_tail);
+
lru_add_page_tail(zone, page, page_tail);
}
@@ -1381,6 +1399,7 @@ int split_huge_page(struct page *page)
BUG_ON(!PageSwapBacked(page));
__split_huge_page(page, anon_vma);
+ count_vm_event(THP_SPLIT);
BUG_ON(PageCompound(page));
out_unlock:
@@ -1738,7 +1757,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
- struct vm_area_struct *vma)
+ struct vm_area_struct *vma,
+ int node)
{
pgd_t *pgd;
pud_t *pud;
@@ -1754,6 +1774,10 @@ static void collapse_huge_page(struct mm_struct *mm,
#ifndef CONFIG_NUMA
VM_BUG_ON(!*hpage);
new_page = *hpage;
+ if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
+ up_read(&mm->mmap_sem);
+ return;
+ }
#else
VM_BUG_ON(*hpage);
/*
@@ -1766,18 +1790,21 @@ static void collapse_huge_page(struct mm_struct *mm,
* mmap_sem in read mode is good idea also to allow greater
* scalability.
*/
- new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
+ new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
+ node, __GFP_OTHER_NODE);
if (unlikely(!new_page)) {
up_read(&mm->mmap_sem);
+ count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM);
return;
}
-#endif
+ count_vm_event(THP_COLLAPSE_ALLOC);
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
up_read(&mm->mmap_sem);
put_page(new_page);
return;
}
+#endif
/* after allocating the hugepage upgrade to mmap_sem write mode */
up_read(&mm->mmap_sem);
@@ -1804,6 +1831,8 @@ static void collapse_huge_page(struct mm_struct *mm,
/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
goto out;
+ if (is_vma_temporary_stack(vma))
+ goto out;
VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
pgd = pgd_offset(mm, address);
@@ -1837,15 +1866,14 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(ptl);
isolated = __collapse_huge_page_isolate(vma, address, pte);
spin_unlock(ptl);
- pte_unmap(pte);
if (unlikely(!isolated)) {
+ pte_unmap(pte);
spin_lock(&mm->page_table_lock);
BUG_ON(!pmd_none(*pmd));
set_pmd_at(mm, address, pmd, _pmd);
spin_unlock(&mm->page_table_lock);
anon_vma_unlock(vma->anon_vma);
- mem_cgroup_uncharge_page(new_page);
goto out;
}
@@ -1856,6 +1884,7 @@ static void collapse_huge_page(struct mm_struct *mm,
anon_vma_unlock(vma->anon_vma);
__collapse_huge_page_copy(pte, new_page, vma, address, ptl);
+ pte_unmap(pte);
__SetPageUptodate(new_page);
pgtable = pmd_pgtable(_pmd);
VM_BUG_ON(page_count(pgtable) != 1);
@@ -1890,6 +1919,7 @@ out_up_write:
return;
out:
+ mem_cgroup_uncharge_page(new_page);
#ifdef CONFIG_NUMA
put_page(new_page);
#endif
@@ -1909,6 +1939,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
struct page *page;
unsigned long _address;
spinlock_t *ptl;
+ int node = -1;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
@@ -1939,6 +1970,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page))
goto out_unmap;
+ /*
+ * Chose the node of the first page. This could
+ * be more sophisticated and look at more pages,
+ * but isn't for now.
+ */
+ if (node == -1)
+ node = page_to_nid(page);
VM_BUG_ON(PageCompound(page));
if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
goto out_unmap;
@@ -1955,7 +1993,7 @@ out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret)
/* collapse_huge_page will return with the mmap_sem released */
- collapse_huge_page(mm, address, hpage, vma);
+ collapse_huge_page(mm, address, hpage, vma, node);
out:
return ret;
}
@@ -2024,32 +2062,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
if ((!(vma->vm_flags & VM_HUGEPAGE) &&
!khugepaged_always()) ||
(vma->vm_flags & VM_NOHUGEPAGE)) {
+ skip:
progress++;
continue;
}
-
/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
- if (!vma->anon_vma || vma->vm_ops || vma->vm_file) {
- khugepaged_scan.address = vma->vm_end;
- progress++;
- continue;
- }
+ if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+ goto skip;
+ if (is_vma_temporary_stack(vma))
+ goto skip;
+
VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
- if (hstart >= hend) {
- progress++;
- continue;
- }
+ if (hstart >= hend)
+ goto skip;
+ if (khugepaged_scan.address > hend)
+ goto skip;
if (khugepaged_scan.address < hstart)
khugepaged_scan.address = hstart;
- if (khugepaged_scan.address > hend) {
- khugepaged_scan.address = hend + HPAGE_PMD_SIZE;
- progress++;
- continue;
- }
- BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
while (khugepaged_scan.address < hend) {
int ret;
@@ -2078,7 +2111,7 @@ breakouterloop:
breakouterloop_mmap_sem:
spin_lock(&khugepaged_mm_lock);
- BUG_ON(khugepaged_scan.mm_slot != mm_slot);
+ VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
/*
* Release the current mm_slot if this mm is about to die, or
* if we scanned all vmas of this mm.
@@ -2130,8 +2163,11 @@ static void khugepaged_do_scan(struct page **hpage)
#ifndef CONFIG_NUMA
if (!*hpage) {
*hpage = alloc_hugepage(khugepaged_defrag());
- if (unlikely(!*hpage))
+ if (unlikely(!*hpage)) {
+ count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
break;
+ }
+ count_vm_event(THP_COLLAPSE_ALLOC);
}
#else
if (IS_ERR(*hpage))
@@ -2171,8 +2207,11 @@ static struct page *khugepaged_alloc_hugepage(void)
do {
hpage = alloc_hugepage(khugepaged_defrag());
- if (!hpage)
+ if (!hpage) {
+ count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
khugepaged_alloc_sleep();
+ } else
+ count_vm_event(THP_COLLAPSE_ALLOC);
} while (unlikely(!hpage) &&
likely(khugepaged_enabled()));
return hpage;
@@ -2189,8 +2228,11 @@ static void khugepaged_loop(void)
while (likely(khugepaged_enabled())) {
#ifndef CONFIG_NUMA
hpage = khugepaged_alloc_hugepage();
- if (unlikely(!hpage))
+ if (unlikely(!hpage)) {
+ count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
break;
+ }
+ count_vm_event(THP_COLLAPSE_ALLOC);
#else
if (IS_ERR(hpage)) {
khugepaged_alloc_sleep();
@@ -2233,9 +2275,9 @@ static int khugepaged(void *none)
for (;;) {
mutex_unlock(&khugepaged_mutex);
- BUG_ON(khugepaged_thread != current);
+ VM_BUG_ON(khugepaged_thread != current);
khugepaged_loop();
- BUG_ON(khugepaged_thread != current);
+ VM_BUG_ON(khugepaged_thread != current);
mutex_lock(&khugepaged_mutex);
if (!khugepaged_enabled())