diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 124 |
1 files changed, 99 insertions, 25 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 67a71191136..6058b53dcb8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> +#include <linux/seq_file.h> #include <linux/sysctl.h> #include <linux/highmem.h> #include <linux/mmu_notifier.h> @@ -262,7 +263,7 @@ struct resv_map { struct list_head regions; }; -struct resv_map *resv_map_alloc(void) +static struct resv_map *resv_map_alloc(void) { struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); if (!resv_map) @@ -274,7 +275,7 @@ struct resv_map *resv_map_alloc(void) return resv_map; } -void resv_map_release(struct kref *ref) +static void resv_map_release(struct kref *ref) { struct resv_map *resv_map = container_of(ref, struct resv_map, refs); @@ -289,7 +290,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return (struct resv_map *)(get_vma_private_data(vma) & ~HPAGE_RESV_MASK); - return 0; + return NULL; } static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) @@ -353,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) return 0; } +static void clear_gigantic_page(struct page *page, + unsigned long addr, unsigned long sz) +{ + int i; + struct page *p = page; + + might_sleep(); + for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { + cond_resched(); + clear_user_highpage(p, addr + i * PAGE_SIZE); + } +} static void clear_huge_page(struct page *page, unsigned long addr, unsigned long sz) { int i; + if (unlikely(sz > MAX_ORDER_NR_PAGES)) + return clear_gigantic_page(page, addr, sz); + might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++) { cond_resched(); @@ -365,12 +381,32 @@ static void clear_huge_page(struct page *page, } } +static void copy_gigantic_page(struct page *dst, struct page *src, + unsigned long addr, struct vm_area_struct *vma) +{ + int i; + struct hstate *h = hstate_vma(vma); + struct page *dst_base = dst; + struct page *src_base = src; + might_sleep(); + for (i = 0; i < pages_per_huge_page(h); ) { + cond_resched(); + copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; struct hstate *h = hstate_vma(vma); + if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) + return copy_gigantic_page(dst, src, addr, vma); + might_sleep(); for (i = 0; i < pages_per_huge_page(h); i++) { cond_resched(); @@ -455,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) { int i; + VM_BUG_ON(h->order >= MAX_ORDER); + h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { @@ -969,6 +1007,14 @@ found: return 1; } +static void prep_compound_huge_page(struct page *page, int order) +{ + if (unlikely(order > (MAX_ORDER - 1))) + prep_compound_gigantic_page(page, order); + else + prep_compound_page(page, order); +} + /* Put bootmem huge pages into the standard lists after mem_map is up */ static void __init gather_bootmem_prealloc(void) { @@ -979,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) struct hstate *h = m->hstate; __ClearPageReserved(page); WARN_ON(page_count(page) != 1); - prep_compound_page(page, h->order); + prep_compound_huge_page(page, h->order); prep_new_huge_page(h, page, page_to_nid(page)); } } @@ -1455,15 +1501,15 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, #endif /* CONFIG_SYSCTL */ -int hugetlb_report_meminfo(char *buf) +void hugetlb_report_meminfo(struct seq_file *m) { struct hstate *h = &default_hstate; - return sprintf(buf, - "HugePages_Total: %5lu\n" - "HugePages_Free: %5lu\n" - "HugePages_Rsvd: %5lu\n" - "HugePages_Surp: %5lu\n" - "Hugepagesize: %5lu kB\n", + seq_printf(m, + "HugePages_Total: %5lu\n" + "HugePages_Free: %5lu\n" + "HugePages_Rsvd: %5lu\n" + "HugePages_Surp: %5lu\n" + "Hugepagesize: %8lu kB\n", h->nr_huge_pages, h->free_huge_pages, h->resv_huge_pages, @@ -1747,11 +1793,10 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, * from other VMAs and let the children be SIGKILLed if they are faulting the * same region. */ -int unmap_ref_private(struct mm_struct *mm, - struct vm_area_struct *vma, - struct page *page, - unsigned long address) +static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, + struct page *page, unsigned long address) { + struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; struct address_space *mapping; struct prio_tree_iter iter; @@ -1761,7 +1806,7 @@ int unmap_ref_private(struct mm_struct *mm, * vm_pgoff is in PAGE_SIZE units, hence the different calculation * from page cache lookup which is in HPAGE_SIZE units. */ - address = address & huge_page_mask(hstate_vma(vma)); + address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + (vma->vm_pgoff >> PAGE_SHIFT); mapping = (struct address_space *)page_private(page); @@ -1780,7 +1825,7 @@ int unmap_ref_private(struct mm_struct *mm, */ if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) unmap_hugepage_range(iter_vma, - address, address + HPAGE_SIZE, + address, address + huge_page_size(h), page); } @@ -2008,7 +2053,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, entry = huge_ptep_get(ptep); if (huge_pte_none(entry)) { ret = hugetlb_no_page(mm, vma, address, ptep, write_access); - goto out_unlock; + goto out_mutex; } ret = 0; @@ -2024,7 +2069,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (write_access && !pte_write(entry)) { if (vma_needs_reservation(h, vma, address) < 0) { ret = VM_FAULT_OOM; - goto out_unlock; + goto out_mutex; } if (!(vma->vm_flags & VM_SHARED)) @@ -2034,10 +2079,23 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, spin_lock(&mm->page_table_lock); /* Check for a racing update before calling hugetlb_cow */ - if (likely(pte_same(entry, huge_ptep_get(ptep)))) - if (write_access && !pte_write(entry)) + if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) + goto out_page_table_lock; + + + if (write_access) { + if (!pte_write(entry)) { ret = hugetlb_cow(mm, vma, address, ptep, entry, pagecache_page); + goto out_page_table_lock; + } + entry = pte_mkdirty(entry); + } + entry = pte_mkyoung(entry); + if (huge_ptep_set_access_flags(vma, address, ptep, entry, write_access)) + update_mmu_cache(vma, address, entry); + +out_page_table_lock: spin_unlock(&mm->page_table_lock); if (pagecache_page) { @@ -2045,7 +2103,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(pagecache_page); } -out_unlock: +out_mutex: mutex_unlock(&hugetlb_instantiation_mutex); return ret; @@ -2060,6 +2118,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return NULL; } +static int huge_zeropage_ok(pte_t *ptep, int write, int shared) +{ + if (!ptep || write || shared) + return 0; + else + return huge_pte_none(huge_ptep_get(ptep)); +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, @@ -2069,6 +2135,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr = *position; int remainder = *length; struct hstate *h = hstate_vma(vma); + int zeropage_ok = 0; + int shared = vma->vm_flags & VM_SHARED; spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { @@ -2081,8 +2149,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * first, for the page indexing below to work. */ pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); + if (huge_zeropage_ok(pte, write, shared)) + zeropage_ok = 1; - if (!pte || huge_pte_none(huge_ptep_get(pte)) || + if (!pte || + (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || (write && !pte_write(huge_ptep_get(pte)))) { int ret; @@ -2102,8 +2173,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, page = pte_page(huge_ptep_get(pte)); same_page: if (pages) { - get_page(page); - pages[i] = page + pfn_offset; + if (zeropage_ok) + pages[i] = ZERO_PAGE(0); + else + pages[i] = mem_map_offset(page, pfn_offset); + get_page(pages[i]); } if (vmas) |