diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 878 |
1 files changed, 556 insertions, 322 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c01cb9fedb1..eeceeeb0901 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -13,6 +13,7 @@ #include <linux/nodemask.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> +#include <linux/compiler.h> #include <linux/cpuset.h> #include <linux/mutex.h> #include <linux/bootmem.h> @@ -22,6 +23,7 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/page-isolation.h> +#include <linux/jhash.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -33,7 +35,6 @@ #include <linux/node.h> #include "internal.h" -const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; unsigned long hugepages_treat_as_movable; int hugetlb_max_hstate __read_mostly; @@ -53,6 +54,13 @@ static unsigned long __initdata default_hstate_size; */ DEFINE_SPINLOCK(hugetlb_lock); +/* + * Serializes faults on the same logical page. This is used to + * prevent spurious OOMs when the hugepage pool is fully utilized. + */ +static int num_fault_mutexes; +static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp; + static inline void unlock_or_release_subpool(struct hugepage_subpool *spool) { bool free = (spool->count == 0) && (spool->used_hpages == 0); @@ -135,15 +143,8 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) * Region tracking -- allows tracking of reservations and instantiated pages * across the pages in a mapping. * - * The region data structures are protected by a combination of the mmap_sem - * and the hugetlb_instantiation_mutex. To access or modify a region the caller - * must either hold the mmap_sem for write, or the mmap_sem for read and - * the hugetlb_instantiation_mutex: - * - * down_write(&mm->mmap_sem); - * or - * down_read(&mm->mmap_sem); - * mutex_lock(&hugetlb_instantiation_mutex); + * The region data structures are embedded into a resv_map and + * protected by a resv_map's lock */ struct file_region { struct list_head link; @@ -151,10 +152,12 @@ struct file_region { long to; }; -static long region_add(struct list_head *head, long f, long t) +static long region_add(struct resv_map *resv, long f, long t) { + struct list_head *head = &resv->regions; struct file_region *rg, *nrg, *trg; + spin_lock(&resv->lock); /* Locate the region we are either in or before. */ list_for_each_entry(rg, head, link) if (f <= rg->to) @@ -184,14 +187,18 @@ static long region_add(struct list_head *head, long f, long t) } nrg->from = f; nrg->to = t; + spin_unlock(&resv->lock); return 0; } -static long region_chg(struct list_head *head, long f, long t) +static long region_chg(struct resv_map *resv, long f, long t) { - struct file_region *rg, *nrg; + struct list_head *head = &resv->regions; + struct file_region *rg, *nrg = NULL; long chg = 0; +retry: + spin_lock(&resv->lock); /* Locate the region we are before or in. */ list_for_each_entry(rg, head, link) if (f <= rg->to) @@ -201,15 +208,21 @@ static long region_chg(struct list_head *head, long f, long t) * Subtle, allocate a new region at the position but make it zero * size such that we can guarantee to record the reservation. */ if (&rg->link == head || t < rg->from) { - nrg = kmalloc(sizeof(*nrg), GFP_KERNEL); - if (!nrg) - return -ENOMEM; - nrg->from = f; - nrg->to = f; - INIT_LIST_HEAD(&nrg->link); - list_add(&nrg->link, rg->link.prev); + if (!nrg) { + spin_unlock(&resv->lock); + nrg = kmalloc(sizeof(*nrg), GFP_KERNEL); + if (!nrg) + return -ENOMEM; + + nrg->from = f; + nrg->to = f; + INIT_LIST_HEAD(&nrg->link); + goto retry; + } - return t - f; + list_add(&nrg->link, rg->link.prev); + chg = t - f; + goto out_nrg; } /* Round our left edge to the current segment if it encloses us. */ @@ -222,7 +235,7 @@ static long region_chg(struct list_head *head, long f, long t) if (&rg->link == head) break; if (rg->from > t) - return chg; + goto out; /* We overlap with this area, if it extends further than * us then we must extend ourselves. Account for its @@ -233,20 +246,30 @@ static long region_chg(struct list_head *head, long f, long t) } chg -= rg->to - rg->from; } + +out: + spin_unlock(&resv->lock); + /* We already know we raced and no longer need the new region */ + kfree(nrg); + return chg; +out_nrg: + spin_unlock(&resv->lock); return chg; } -static long region_truncate(struct list_head *head, long end) +static long region_truncate(struct resv_map *resv, long end) { + struct list_head *head = &resv->regions; struct file_region *rg, *trg; long chg = 0; + spin_lock(&resv->lock); /* Locate the region we are either in or before. */ list_for_each_entry(rg, head, link) if (end <= rg->to) break; if (&rg->link == head) - return 0; + goto out; /* If we are in the middle of a region then adjust it. */ if (end > rg->from) { @@ -263,14 +286,19 @@ static long region_truncate(struct list_head *head, long end) list_del(&rg->link); kfree(rg); } + +out: + spin_unlock(&resv->lock); return chg; } -static long region_count(struct list_head *head, long f, long t) +static long region_count(struct resv_map *resv, long f, long t) { + struct list_head *head = &resv->regions; struct file_region *rg; long chg = 0; + spin_lock(&resv->lock); /* Locate each segment we overlap with, and count that overlap. */ list_for_each_entry(rg, head, link) { long seg_from; @@ -286,6 +314,7 @@ static long region_count(struct list_head *head, long f, long t) chg += seg_to - seg_from; } + spin_unlock(&resv->lock); return chg; } @@ -376,39 +405,46 @@ static void set_vma_private_data(struct vm_area_struct *vma, vma->vm_private_data = (void *)value; } -struct resv_map { - struct kref refs; - struct list_head regions; -}; - -static struct resv_map *resv_map_alloc(void) +struct resv_map *resv_map_alloc(void) { struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL); if (!resv_map) return NULL; kref_init(&resv_map->refs); + spin_lock_init(&resv_map->lock); INIT_LIST_HEAD(&resv_map->regions); return resv_map; } -static void resv_map_release(struct kref *ref) +void resv_map_release(struct kref *ref) { struct resv_map *resv_map = container_of(ref, struct resv_map, refs); /* Clear out any active regions before we release the map. */ - region_truncate(&resv_map->regions, 0); + region_truncate(resv_map, 0); kfree(resv_map); } +static inline struct resv_map *inode_resv_map(struct inode *inode) +{ + return inode->i_mapping->private_data; +} + static struct resv_map *vma_resv_map(struct vm_area_struct *vma) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - if (!(vma->vm_flags & VM_MAYSHARE)) + if (vma->vm_flags & VM_MAYSHARE) { + struct address_space *mapping = vma->vm_file->f_mapping; + struct inode *inode = mapping->host; + + return inode_resv_map(inode); + + } else { return (struct resv_map *)(get_vma_private_data(vma) & ~HPAGE_RESV_MASK); - return NULL; + } } static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) @@ -507,7 +543,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { - if (hugepages_treat_as_movable || hugepage_migration_support(h)) + if (hugepages_treat_as_movable || hugepage_migration_supported(h)) return GFP_HIGHUSER_MOVABLE; else return GFP_HIGHUSER; @@ -540,7 +576,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, goto err; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = huge_zonelist(vma, address, htlb_alloc_mask(h), &mpol, &nodemask); @@ -562,7 +598,7 @@ retry_cpuset: } mpol_cond_put(mpol); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; @@ -570,25 +606,242 @@ err: return NULL; } +/* + * common helper functions for hstate_next_node_to_{alloc|free}. + * We may have allocated or freed a huge page based on a different + * nodes_allowed previously, so h->next_node_to_{alloc|free} might + * be outside of *nodes_allowed. Ensure that we use an allowed + * node for alloc or free. + */ +static int next_node_allowed(int nid, nodemask_t *nodes_allowed) +{ + nid = next_node(nid, *nodes_allowed); + if (nid == MAX_NUMNODES) + nid = first_node(*nodes_allowed); + VM_BUG_ON(nid >= MAX_NUMNODES); + + return nid; +} + +static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) +{ + if (!node_isset(nid, *nodes_allowed)) + nid = next_node_allowed(nid, nodes_allowed); + return nid; +} + +/* + * returns the previously saved node ["this node"] from which to + * allocate a persistent huge page for the pool and advance the + * next node from which to allocate, handling wrap at end of node + * mask. + */ +static int hstate_next_node_to_alloc(struct hstate *h, + nodemask_t *nodes_allowed) +{ + int nid; + + VM_BUG_ON(!nodes_allowed); + + nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed); + h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed); + + return nid; +} + +/* + * helper for free_pool_huge_page() - return the previously saved + * node ["this node"] from which to free a huge page. Advance the + * next node id whether or not we find a free huge page to free so + * that the next attempt to free addresses the next node. + */ +static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) +{ + int nid; + + VM_BUG_ON(!nodes_allowed); + + nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); + h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); + + return nid; +} + +#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \ + for (nr_nodes = nodes_weight(*mask); \ + nr_nodes > 0 && \ + ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \ + nr_nodes--) + +#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ + for (nr_nodes = nodes_weight(*mask); \ + nr_nodes > 0 && \ + ((node = hstate_next_node_to_free(hs, mask)) || 1); \ + nr_nodes--) + +#if defined(CONFIG_CMA) && defined(CONFIG_X86_64) +static void destroy_compound_gigantic_page(struct page *page, + unsigned long order) +{ + int i; + int nr_pages = 1 << order; + struct page *p = page + 1; + + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { + __ClearPageTail(p); + set_page_refcounted(p); + p->first_page = NULL; + } + + set_compound_order(page, 0); + __ClearPageHead(page); +} + +static void free_gigantic_page(struct page *page, unsigned order) +{ + free_contig_range(page_to_pfn(page), 1 << order); +} + +static int __alloc_gigantic_page(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long end_pfn = start_pfn + nr_pages; + return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE); +} + +static bool pfn_range_valid_gigantic(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long i, end_pfn = start_pfn + nr_pages; + struct page *page; + + for (i = start_pfn; i < end_pfn; i++) { + if (!pfn_valid(i)) + return false; + + page = pfn_to_page(i); + + if (PageReserved(page)) + return false; + + if (page_count(page) > 0) + return false; + + if (PageHuge(page)) + return false; + } + + return true; +} + +static bool zone_spans_last_pfn(const struct zone *zone, + unsigned long start_pfn, unsigned long nr_pages) +{ + unsigned long last_pfn = start_pfn + nr_pages - 1; + return zone_spans_pfn(zone, last_pfn); +} + +static struct page *alloc_gigantic_page(int nid, unsigned order) +{ + unsigned long nr_pages = 1 << order; + unsigned long ret, pfn, flags; + struct zone *z; + + z = NODE_DATA(nid)->node_zones; + for (; z - NODE_DATA(nid)->node_zones < MAX_NR_ZONES; z++) { + spin_lock_irqsave(&z->lock, flags); + + pfn = ALIGN(z->zone_start_pfn, nr_pages); + while (zone_spans_last_pfn(z, pfn, nr_pages)) { + if (pfn_range_valid_gigantic(pfn, nr_pages)) { + /* + * We release the zone lock here because + * alloc_contig_range() will also lock the zone + * at some point. If there's an allocation + * spinning on this lock, it may win the race + * and cause alloc_contig_range() to fail... + */ + spin_unlock_irqrestore(&z->lock, flags); + ret = __alloc_gigantic_page(pfn, nr_pages); + if (!ret) + return pfn_to_page(pfn); + spin_lock_irqsave(&z->lock, flags); + } + pfn += nr_pages; + } + + spin_unlock_irqrestore(&z->lock, flags); + } + + return NULL; +} + +static void prep_new_huge_page(struct hstate *h, struct page *page, int nid); +static void prep_compound_gigantic_page(struct page *page, unsigned long order); + +static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid) +{ + struct page *page; + + page = alloc_gigantic_page(nid, huge_page_order(h)); + if (page) { + prep_compound_gigantic_page(page, huge_page_order(h)); + prep_new_huge_page(h, page, nid); + } + + return page; +} + +static int alloc_fresh_gigantic_page(struct hstate *h, + nodemask_t *nodes_allowed) +{ + struct page *page = NULL; + int nr_nodes, node; + + for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { + page = alloc_fresh_gigantic_page_node(h, node); + if (page) + return 1; + } + + return 0; +} + +static inline bool gigantic_page_supported(void) { return true; } +#else +static inline bool gigantic_page_supported(void) { return false; } +static inline void free_gigantic_page(struct page *page, unsigned order) { } +static inline void destroy_compound_gigantic_page(struct page *page, + unsigned long order) { } +static inline int alloc_fresh_gigantic_page(struct hstate *h, + nodemask_t *nodes_allowed) { return 0; } +#endif + static void update_and_free_page(struct hstate *h, struct page *page) { int i; - VM_BUG_ON(h->order >= MAX_ORDER); + if (hstate_is_gigantic(h) && !gigantic_page_supported()) + return; h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | - 1 << PG_active | 1 << PG_reserved | - 1 << PG_private | 1 << PG_writeback); + 1 << PG_active | 1 << PG_private | + 1 << PG_writeback); } VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); set_compound_page_dtor(page, NULL); set_page_refcounted(page); - arch_release_hugepage(page); - __free_pages(page, huge_page_order(h)); + if (hstate_is_gigantic(h)) { + destroy_compound_gigantic_page(page, huge_page_order(h)); + free_gigantic_page(page, huge_page_order(h)); + } else { + arch_release_hugepage(page); + __free_pages(page, huge_page_order(h)); + } } struct hstate *size_to_hstate(unsigned long size) @@ -602,7 +855,7 @@ struct hstate *size_to_hstate(unsigned long size) return NULL; } -static void free_huge_page(struct page *page) +void free_huge_page(struct page *page) { /* * Can't pass hstate in here because it is called from the @@ -627,7 +880,7 @@ static void free_huge_page(struct page *page) if (restore_reserve) h->resv_huge_pages++; - if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) { + if (h->surplus_huge_pages_node[nid]) { /* remove the page from active list */ list_del(&page->lru); update_and_free_page(h, page); @@ -731,9 +984,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) { struct page *page; - if (h->order >= MAX_ORDER) - return NULL; - page = alloc_pages_exact_node(nid, htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, @@ -749,79 +999,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) return page; } -/* - * common helper functions for hstate_next_node_to_{alloc|free}. - * We may have allocated or freed a huge page based on a different - * nodes_allowed previously, so h->next_node_to_{alloc|free} might - * be outside of *nodes_allowed. Ensure that we use an allowed - * node for alloc or free. - */ -static int next_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - nid = next_node(nid, *nodes_allowed); - if (nid == MAX_NUMNODES) - nid = first_node(*nodes_allowed); - VM_BUG_ON(nid >= MAX_NUMNODES); - - return nid; -} - -static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - if (!node_isset(nid, *nodes_allowed)) - nid = next_node_allowed(nid, nodes_allowed); - return nid; -} - -/* - * returns the previously saved node ["this node"] from which to - * allocate a persistent huge page for the pool and advance the - * next node from which to allocate, handling wrap at end of node - * mask. - */ -static int hstate_next_node_to_alloc(struct hstate *h, - nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed); - h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -/* - * helper for free_pool_huge_page() - return the previously saved - * node ["this node"] from which to free a huge page. Advance the - * next node id whether or not we find a free huge page to free so - * that the next attempt to free addresses the next node. - */ -static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); - h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \ - nr_nodes--) - -#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_free(hs, mask)) || 1); \ - nr_nodes--) - static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed) { struct page *page; @@ -911,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) unsigned long pfn; struct hstate *h; + if (!hugepages_supported()) + return; + /* Set scan step to minimum hugepage size */ for_each_hstate(h) if (order > huge_page_order(h)) @@ -925,7 +1105,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) struct page *page; unsigned int r_nid; - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return NULL; /* @@ -1118,7 +1298,7 @@ static void return_unused_surplus_pages(struct hstate *h, h->resv_huge_pages -= unused_resv_pages; /* Cannot return gigantic pages currently */ - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return; nr_pages = min(unused_resv_pages, h->surplus_huge_pages); @@ -1134,6 +1314,7 @@ static void return_unused_surplus_pages(struct hstate *h, while (nr_pages--) { if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) break; + cond_resched_lock(&hugetlb_lock); } } @@ -1150,45 +1331,34 @@ static void return_unused_surplus_pages(struct hstate *h, static long vma_needs_reservation(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { - struct address_space *mapping = vma->vm_file->f_mapping; - struct inode *inode = mapping->host; - - if (vma->vm_flags & VM_MAYSHARE) { - pgoff_t idx = vma_hugecache_offset(h, vma, addr); - return region_chg(&inode->i_mapping->private_list, - idx, idx + 1); + struct resv_map *resv; + pgoff_t idx; + long chg; - } else if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + resv = vma_resv_map(vma); + if (!resv) return 1; - } else { - long err; - pgoff_t idx = vma_hugecache_offset(h, vma, addr); - struct resv_map *resv = vma_resv_map(vma); + idx = vma_hugecache_offset(h, vma, addr); + chg = region_chg(resv, idx, idx + 1); - err = region_chg(&resv->regions, idx, idx + 1); - if (err < 0) - return err; - return 0; - } + if (vma->vm_flags & VM_MAYSHARE) + return chg; + else + return chg < 0 ? chg : 0; } static void vma_commit_reservation(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { - struct address_space *mapping = vma->vm_file->f_mapping; - struct inode *inode = mapping->host; - - if (vma->vm_flags & VM_MAYSHARE) { - pgoff_t idx = vma_hugecache_offset(h, vma, addr); - region_add(&inode->i_mapping->private_list, idx, idx + 1); + struct resv_map *resv; + pgoff_t idx; - } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { - pgoff_t idx = vma_hugecache_offset(h, vma, addr); - struct resv_map *resv = vma_resv_map(vma); + resv = vma_resv_map(vma); + if (!resv) + return; - /* Mark this page used in the map. */ - region_add(&resv->regions, idx, idx + 1); - } + idx = vma_hugecache_offset(h, vma, addr); + region_add(resv, idx, idx + 1); } static struct page *alloc_huge_page(struct vm_area_struct *vma, @@ -1218,24 +1388,17 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg); - if (ret) { - if (chg || avoid_reserve) - hugepage_subpool_put_pages(spool, 1); - return ERR_PTR(-ENOSPC); - } + if (ret) + goto out_subpool_put; + spin_lock(&hugetlb_lock); page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg); if (!page) { spin_unlock(&hugetlb_lock); page = alloc_buddy_huge_page(h, NUMA_NO_NODE); - if (!page) { - hugetlb_cgroup_uncharge_cgroup(idx, - pages_per_huge_page(h), - h_cg); - if (chg || avoid_reserve) - hugepage_subpool_put_pages(spool, 1); - return ERR_PTR(-ENOSPC); - } + if (!page) + goto out_uncharge_cgroup; + spin_lock(&hugetlb_lock); list_move(&page->lru, &h->hugepage_activelist); /* Fall through */ @@ -1247,6 +1410,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, vma_commit_reservation(h, vma, addr); return page; + +out_uncharge_cgroup: + hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg); +out_subpool_put: + if (chg || avoid_reserve) + hugepage_subpool_put_pages(spool, 1); + return ERR_PTR(-ENOSPC); } /* @@ -1294,7 +1464,7 @@ found: return 1; } -static void prep_compound_huge_page(struct page *page, int order) +static void __init prep_compound_huge_page(struct page *page, int order) { if (unlikely(order > (MAX_ORDER - 1))) prep_compound_gigantic_page(page, order); @@ -1328,7 +1498,7 @@ static void __init gather_bootmem_prealloc(void) * fix confusing memory reports from free(1) and another * side-effects, like CommitLimit going negative. */ - if (h->order > (MAX_ORDER - 1)) + if (hstate_is_gigantic(h)) adjust_managed_page_count(page, 1 << h->order); } } @@ -1338,7 +1508,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) unsigned long i; for (i = 0; i < h->max_huge_pages; ++i) { - if (h->order >= MAX_ORDER) { + if (hstate_is_gigantic(h)) { if (!alloc_bootmem_huge_page(h)) break; } else if (!alloc_fresh_huge_page(h, @@ -1354,7 +1524,7 @@ static void __init hugetlb_init_hstates(void) for_each_hstate(h) { /* oversize hugepages were init'ed in early boot */ - if (h->order < MAX_ORDER) + if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } } @@ -1388,7 +1558,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count, { int i; - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return; for_each_node_mask(i, *nodes_allowed) { @@ -1451,7 +1621,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, { unsigned long min_count, ret; - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h) && !gigantic_page_supported()) return h->max_huge_pages; /* @@ -1478,7 +1648,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, * and reducing the surplus. */ spin_unlock(&hugetlb_lock); - ret = alloc_fresh_huge_page(h, nodes_allowed); + if (hstate_is_gigantic(h)) + ret = alloc_fresh_gigantic_page(h, nodes_allowed); + else + ret = alloc_fresh_huge_page(h, nodes_allowed); spin_lock(&hugetlb_lock); if (!ret) goto out; @@ -1509,6 +1682,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, while (min_count < persistent_huge_pages(h)) { if (!free_pool_huge_page(h, nodes_allowed, 0)) break; + cond_resched_lock(&hugetlb_lock); } while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) @@ -1562,22 +1736,14 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj, return sprintf(buf, "%lu\n", nr_huge_pages); } -static ssize_t nr_hugepages_store_common(bool obey_mempolicy, - struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t len) +static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, + struct hstate *h, int nid, + unsigned long count, size_t len) { int err; - int nid; - unsigned long count; - struct hstate *h; NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY); - err = kstrtoul(buf, 10, &count); - if (err) - goto out; - - h = kobj_to_hstate(kobj, &nid); - if (h->order >= MAX_ORDER) { + if (hstate_is_gigantic(h) && !gigantic_page_supported()) { err = -EINVAL; goto out; } @@ -1612,6 +1778,23 @@ out: return err; } +static ssize_t nr_hugepages_store_common(bool obey_mempolicy, + struct kobject *kobj, const char *buf, + size_t len) +{ + struct hstate *h; + unsigned long count; + int nid; + int err; + + err = kstrtoul(buf, 10, &count); + if (err) + return err; + + h = kobj_to_hstate(kobj, &nid); + return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len); +} + static ssize_t nr_hugepages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -1621,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj, static ssize_t nr_hugepages_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { - return nr_hugepages_store_common(false, kobj, attr, buf, len); + return nr_hugepages_store_common(false, kobj, buf, len); } HSTATE_ATTR(nr_hugepages); @@ -1640,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj, static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { - return nr_hugepages_store_common(true, kobj, attr, buf, len); + return nr_hugepages_store_common(true, kobj, buf, len); } HSTATE_ATTR(nr_hugepages_mempolicy); #endif @@ -1660,7 +1843,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, unsigned long input; struct hstate *h = kobj_to_hstate(kobj, NULL); - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return -EINVAL; err = kstrtoul(buf, 10, &input); @@ -1944,16 +2127,15 @@ static void __exit hugetlb_exit(void) } kobject_put(hugepages_kobj); + kfree(htlb_fault_mutex_table); } module_exit(hugetlb_exit); static int __init hugetlb_init(void) { - /* Some platform decide whether they support huge pages at boot - * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when - * there is no such support - */ - if (HPAGE_SHIFT == 0) + int i; + + if (!hugepages_supported()) return 0; if (!size_to_hstate(default_hstate_size)) { @@ -1973,6 +2155,17 @@ static int __init hugetlb_init(void) hugetlb_register_all_nodes(); hugetlb_cgroup_file_init(); +#ifdef CONFIG_SMP + num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus()); +#else + num_fault_mutexes = 1; +#endif + htlb_fault_mutex_table = + kmalloc(sizeof(struct mutex) * num_fault_mutexes, GFP_KERNEL); + BUG_ON(!htlb_fault_mutex_table); + + for (i = 0; i < num_fault_mutexes; i++) + mutex_init(&htlb_fault_mutex_table[i]); return 0; } module_init(hugetlb_init); @@ -2066,13 +2259,11 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; - unsigned long tmp; + unsigned long tmp = h->max_huge_pages; int ret; - tmp = h->max_huge_pages; - - if (write && h->order >= MAX_ORDER) - return -EINVAL; + if (!hugepages_supported()) + return -ENOTSUPP; table->data = &tmp; table->maxlen = sizeof(unsigned long); @@ -2080,19 +2271,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, if (ret) goto out; - if (write) { - NODEMASK_ALLOC(nodemask_t, nodes_allowed, - GFP_KERNEL | __GFP_NORETRY); - if (!(obey_mempolicy && - init_nodemask_of_mempolicy(nodes_allowed))) { - NODEMASK_FREE(nodes_allowed); - nodes_allowed = &node_states[N_MEMORY]; - } - h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed); - - if (nodes_allowed != &node_states[N_MEMORY]) - NODEMASK_FREE(nodes_allowed); - } + if (write) + ret = __nr_hugepages_store_common(obey_mempolicy, h, + NUMA_NO_NODE, tmp, *length); out: return ret; } @@ -2122,9 +2303,12 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, unsigned long tmp; int ret; + if (!hugepages_supported()) + return -ENOTSUPP; + tmp = h->nr_overcommit_huge_pages; - if (write && h->order >= MAX_ORDER) + if (write && hstate_is_gigantic(h)) return -EINVAL; table->data = &tmp; @@ -2147,6 +2331,8 @@ out: void hugetlb_report_meminfo(struct seq_file *m) { struct hstate *h = &default_hstate; + if (!hugepages_supported()) + return; seq_printf(m, "HugePages_Total: %5lu\n" "HugePages_Free: %5lu\n" @@ -2163,6 +2349,8 @@ void hugetlb_report_meminfo(struct seq_file *m) int hugetlb_report_node_meminfo(int nid, char *buf) { struct hstate *h = &default_hstate; + if (!hugepages_supported()) + return 0; return sprintf(buf, "Node %d HugePages_Total: %5u\n" "Node %d HugePages_Free: %5u\n" @@ -2177,6 +2365,9 @@ void hugetlb_show_meminfo(void) struct hstate *h; int nid; + if (!hugepages_supported()) + return; + for_each_node_state(nid, N_MEMORY) for_each_hstate(h) pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", @@ -2251,41 +2442,30 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) * after this open call completes. It is therefore safe to take a * new reference here without additional locking. */ - if (resv) + if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) kref_get(&resv->refs); } -static void resv_map_put(struct vm_area_struct *vma) -{ - struct resv_map *resv = vma_resv_map(vma); - - if (!resv) - return; - kref_put(&resv->refs, resv_map_release); -} - static void hugetlb_vm_op_close(struct vm_area_struct *vma) { struct hstate *h = hstate_vma(vma); struct resv_map *resv = vma_resv_map(vma); struct hugepage_subpool *spool = subpool_vma(vma); - unsigned long reserve; - unsigned long start; - unsigned long end; + unsigned long reserve, start, end; - if (resv) { - start = vma_hugecache_offset(h, vma, vma->vm_start); - end = vma_hugecache_offset(h, vma, vma->vm_end); + if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + return; - reserve = (end - start) - - region_count(&resv->regions, start, end); + start = vma_hugecache_offset(h, vma, vma->vm_start); + end = vma_hugecache_offset(h, vma, vma->vm_end); - resv_map_put(vma); + reserve = (end - start) - region_count(resv, start, end); - if (reserve) { - hugetlb_acct_memory(h, -reserve); - hugepage_subpool_put_pages(spool, reserve); - } + kref_put(&resv->refs, resv_map_release); + + if (reserve) { + hugetlb_acct_memory(h, -reserve); + hugepage_subpool_put_pages(spool, reserve); } } @@ -2336,6 +2516,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, update_mmu_cache(vma, address, ptep); } +static int is_hugetlb_entry_migration(pte_t pte) +{ + swp_entry_t swp; + + if (huge_pte_none(pte) || pte_present(pte)) + return 0; + swp = pte_to_swp_entry(pte); + if (non_swap_entry(swp) && is_migration_entry(swp)) + return 1; + else + return 0; +} + +static int is_hugetlb_entry_hwpoisoned(pte_t pte) +{ + swp_entry_t swp; + + if (huge_pte_none(pte) || pte_present(pte)) + return 0; + swp = pte_to_swp_entry(pte); + if (non_swap_entry(swp) && is_hwpoison_entry(swp)) + return 1; + else + return 0; +} int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) @@ -2375,7 +2580,24 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); - if (!huge_pte_none(huge_ptep_get(src_pte))) { + entry = huge_ptep_get(src_pte); + if (huge_pte_none(entry)) { /* skip none entry */ + ; + } else if (unlikely(is_hugetlb_entry_migration(entry) || + is_hugetlb_entry_hwpoisoned(entry))) { + swp_entry_t swp_entry = pte_to_swp_entry(entry); + + if (is_write_migration_entry(swp_entry) && cow) { + /* + * COW mappings require pages in both + * parent and child to be set to read. + */ + make_migration_entry_read(&swp_entry); + entry = swp_entry_to_pte(swp_entry); + set_huge_pte_at(src, addr, src_pte, entry); + } + set_huge_pte_at(dst, addr, dst_pte, entry); + } else { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); entry = huge_ptep_get(src_pte); @@ -2394,32 +2616,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, return ret; } -static int is_hugetlb_entry_migration(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return 0; - swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_migration_entry(swp)) - return 1; - else - return 0; -} - -static int is_hugetlb_entry_hwpoisoned(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return 0; - swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_hwpoison_entry(swp)) - return 1; - else - return 0; -} - void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) @@ -2554,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, * from other VMAs and let the children be SIGKILLed if they are faulting the * same region. */ -static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, - struct page *page, unsigned long address) +static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, + struct page *page, unsigned long address) { struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; @@ -2594,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, address + huge_page_size(h), page); } mutex_unlock(&mapping->i_mmap_mutex); - - return 1; } /* @@ -2610,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, { struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; - int outside_reserve = 0; + int ret = 0, outside_reserve = 0; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -2640,14 +2834,14 @@ retry_avoidcopy: page_cache_get(old_page); - /* Drop page table lock as buddy allocator may be called */ + /* + * Drop page table lock as buddy allocator may be called. It will + * be acquired again before returning to the caller, as expected. + */ spin_unlock(ptl); new_page = alloc_huge_page(vma, address, outside_reserve); if (IS_ERR(new_page)) { - long err = PTR_ERR(new_page); - page_cache_release(old_page); - /* * If a process owning a MAP_PRIVATE mapping fails to COW, * it is due to references held by a child and an insufficient @@ -2656,28 +2850,25 @@ retry_avoidcopy: * may get SIGKILLed if it later faults. */ if (outside_reserve) { + page_cache_release(old_page); BUG_ON(huge_pte_none(pte)); - if (unmap_ref_private(mm, vma, old_page, address)) { - BUG_ON(huge_pte_none(pte)); - spin_lock(ptl); - ptep = huge_pte_offset(mm, address & huge_page_mask(h)); - if (likely(pte_same(huge_ptep_get(ptep), pte))) - goto retry_avoidcopy; - /* - * race occurs while re-acquiring page table - * lock, and our job is done. - */ - return 0; - } - WARN_ON_ONCE(1); + unmap_ref_private(mm, vma, old_page, address); + BUG_ON(huge_pte_none(pte)); + spin_lock(ptl); + ptep = huge_pte_offset(mm, address & huge_page_mask(h)); + if (likely(ptep && + pte_same(huge_ptep_get(ptep), pte))) + goto retry_avoidcopy; + /* + * race occurs while re-acquiring page table + * lock, and our job is done. + */ + return 0; } - /* Caller expects lock to be held */ - spin_lock(ptl); - if (err == -ENOMEM) - return VM_FAULT_OOM; - else - return VM_FAULT_SIGBUS; + ret = (PTR_ERR(new_page) == -ENOMEM) ? + VM_FAULT_OOM : VM_FAULT_SIGBUS; + goto out_release_old; } /* @@ -2685,11 +2876,8 @@ retry_avoidcopy: * anon_vma prepared. */ if (unlikely(anon_vma_prepare(vma))) { - page_cache_release(new_page); - page_cache_release(old_page); - /* Caller expects lock to be held */ - spin_lock(ptl); - return VM_FAULT_OOM; + ret = VM_FAULT_OOM; + goto out_release_all; } copy_user_huge_page(new_page, old_page, address, vma, @@ -2699,13 +2887,14 @@ retry_avoidcopy: mmun_start = address & huge_page_mask(h); mmun_end = mmun_start + huge_page_size(h); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + /* * Retake the page table lock to check for racing updates * before the page tables are altered */ spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); - if (likely(pte_same(huge_ptep_get(ptep), pte))) { + if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { ClearPagePrivate(new_page); /* Break COW */ @@ -2719,12 +2908,13 @@ retry_avoidcopy: } spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); +out_release_all: page_cache_release(new_page); +out_release_old: page_cache_release(old_page); - /* Caller expects lock to be held */ - spin_lock(ptl); - return 0; + spin_lock(ptl); /* Caller expects lock to be held */ + return ret; } /* Return the pagecache page at a given address within a VMA */ @@ -2761,15 +2951,14 @@ static bool hugetlbfs_pagecache_present(struct hstate *h, } static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, unsigned int flags) + struct address_space *mapping, pgoff_t idx, + unsigned long address, pte_t *ptep, unsigned int flags) { struct hstate *h = hstate_vma(vma); int ret = VM_FAULT_SIGBUS; int anon_rmap = 0; - pgoff_t idx; unsigned long size; struct page *page; - struct address_space *mapping; pte_t new_pte; spinlock_t *ptl; @@ -2784,9 +2973,6 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } - mapping = vma->vm_file->f_mapping; - idx = vma_hugecache_offset(h, vma, address); - /* * Use page lock to guard against racing truncation * before we get page_table_lock. @@ -2871,8 +3057,7 @@ retry: if (anon_rmap) { ClearPagePrivate(page); hugepage_add_new_anon_rmap(page, vma, address); - } - else + } else page_dup_rmap(page); new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); @@ -2896,17 +3081,53 @@ backout_unlocked: goto out; } +#ifdef CONFIG_SMP +static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm, + struct vm_area_struct *vma, + struct address_space *mapping, + pgoff_t idx, unsigned long address) +{ + unsigned long key[2]; + u32 hash; + + if (vma->vm_flags & VM_SHARED) { + key[0] = (unsigned long) mapping; + key[1] = idx; + } else { + key[0] = (unsigned long) mm; + key[1] = address >> huge_page_shift(h); + } + + hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0); + + return hash & (num_fault_mutexes - 1); +} +#else +/* + * For uniprocesor systems we always use a single mutex, so just + * return 0 and avoid the hashing overhead. + */ +static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm, + struct vm_area_struct *vma, + struct address_space *mapping, + pgoff_t idx, unsigned long address) +{ + return 0; +} +#endif + int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { - pte_t *ptep; - pte_t entry; + pte_t *ptep, entry; spinlock_t *ptl; int ret; + u32 hash; + pgoff_t idx; struct page *page = NULL; struct page *pagecache_page = NULL; - static DEFINE_MUTEX(hugetlb_instantiation_mutex); struct hstate *h = hstate_vma(vma); + struct address_space *mapping; address &= huge_page_mask(h); @@ -2925,15 +3146,20 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!ptep) return VM_FAULT_OOM; + mapping = vma->vm_file->f_mapping; + idx = vma_hugecache_offset(h, vma, address); + /* * Serialize hugepage allocation and instantiation, so that we don't * get spurious allocation failures if two CPUs race to instantiate * the same page in the page cache. */ - mutex_lock(&hugetlb_instantiation_mutex); + hash = fault_mutex_hash(h, mm, vma, mapping, idx, address); + mutex_lock(&htlb_fault_mutex_table[hash]); + entry = huge_ptep_get(ptep); if (huge_pte_none(entry)) { - ret = hugetlb_no_page(mm, vma, address, ptep, flags); + ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags); goto out_mutex; } @@ -3002,8 +3228,7 @@ out_ptl: put_page(page); out_mutex: - mutex_unlock(&hugetlb_instantiation_mutex); - + mutex_unlock(&htlb_fault_mutex_table[hash]); return ret; } @@ -3120,6 +3345,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, BUG_ON(address >= end); flush_cache_range(vma, address, end); + mmu_notifier_invalidate_range_start(mm, start, end); mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); for (; address < end; address += huge_page_size(h)) { spinlock_t *ptl; @@ -3149,6 +3375,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, */ flush_tlb_range(vma, start, end); mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + mmu_notifier_invalidate_range_end(mm, start, end); return pages << h->order; } @@ -3161,6 +3388,7 @@ int hugetlb_reserve_pages(struct inode *inode, long ret, chg; struct hstate *h = hstate_inode(inode); struct hugepage_subpool *spool = subpool_inode(inode); + struct resv_map *resv_map; /* * Only apply hugepage reservation if asked. At fault time, an @@ -3176,10 +3404,13 @@ int hugetlb_reserve_pages(struct inode *inode, * to reserve the full area even if read-only as mprotect() may be * called to make the mapping read-write. Assume !vma is a shm mapping */ - if (!vma || vma->vm_flags & VM_MAYSHARE) - chg = region_chg(&inode->i_mapping->private_list, from, to); - else { - struct resv_map *resv_map = resv_map_alloc(); + if (!vma || vma->vm_flags & VM_MAYSHARE) { + resv_map = inode_resv_map(inode); + + chg = region_chg(resv_map, from, to); + + } else { + resv_map = resv_map_alloc(); if (!resv_map) return -ENOMEM; @@ -3222,20 +3453,23 @@ int hugetlb_reserve_pages(struct inode *inode, * else has to be done for private mappings here */ if (!vma || vma->vm_flags & VM_MAYSHARE) - region_add(&inode->i_mapping->private_list, from, to); + region_add(resv_map, from, to); return 0; out_err: - if (vma) - resv_map_put(vma); + if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + kref_put(&resv_map->refs, resv_map_release); return ret; } void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) { struct hstate *h = hstate_inode(inode); - long chg = region_truncate(&inode->i_mapping->private_list, offset); + struct resv_map *resv_map = inode_resv_map(inode); + long chg = 0; struct hugepage_subpool *spool = subpool_inode(inode); + if (resv_map) + chg = region_truncate(resv_map, offset); spin_lock(&inode->i_lock); inode->i_blocks -= (blocks_per_huge_page(h) * freed); spin_unlock(&inode->i_lock); @@ -3446,7 +3680,7 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, #else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ /* Can be overriden by architectures */ -__attribute__((weak)) struct page * +struct page * __weak follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int write) { |