diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 51 | ||||
-rw-r--r-- | mm/highmem.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/mempool.c | 35 | ||||
-rw-r--r-- | mm/mmap.c | 34 | ||||
-rw-r--r-- | mm/mremap.c | 6 | ||||
-rw-r--r-- | mm/page-writeback.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 38 | ||||
-rw-r--r-- | mm/rmap.c | 113 | ||||
-rw-r--r-- | mm/slab.c | 47 | ||||
-rw-r--r-- | mm/swap_state.c | 27 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 8 |
13 files changed, 205 insertions, 168 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 93595c327bb..47263ac3e4e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -29,11 +29,6 @@ #include <linux/security.h> #include <linux/syscalls.h> /* - * This is needed for the following functions: - * - try_to_release_page - * - block_invalidatepage - * - generic_osync_inode - * * FIXME: remove all knowledge of the buffer layer from the core VM */ #include <linux/buffer_head.h> /* for generic_osync_inode */ @@ -123,8 +118,7 @@ void remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; - if (unlikely(!PageLocked(page))) - PAGE_BUG(page); + BUG_ON(!PageLocked(page)); write_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); @@ -139,7 +133,25 @@ static int sync_page(void *word) page = container_of((page_flags_t *)word, struct page, flags); /* - * FIXME, fercrissake. What is this barrier here for? + * page_mapping() is being called without PG_locked held. + * Some knowledge of the state and use of the page is used to + * reduce the requirements down to a memory barrier. + * The danger here is of a stale page_mapping() return value + * indicating a struct address_space different from the one it's + * associated with when it is associated with one. + * After smp_mb(), it's either the correct page_mapping() for + * the page, or an old page_mapping() and the page's own + * page_mapping() has gone NULL. + * The ->sync_page() address_space operation must tolerate + * page_mapping() going NULL. By an amazing coincidence, + * this comes about because none of the users of the page + * in the ->sync_page() methods make essential use of the + * page_mapping(), merely passing the page down to the backing + * device's unplug functions when it's non-NULL, which in turn + * ignore it for all cases but swap, where only page->private is + * of interest. When page_mapping() does go NULL, the entire + * call stack gracefully ignores the page and returns. + * -- wli */ smp_mb(); mapping = page_mapping(page); @@ -152,9 +164,10 @@ static int sync_page(void *word) /** * filemap_fdatawrite_range - start writeback against all of a mapping's * dirty pages that lie within the byte offsets <start, end> - * @mapping: address space structure to write - * @start: offset in bytes where the range starts - * @end : offset in bytes where the range ends + * @mapping: address space structure to write + * @start: offset in bytes where the range starts + * @end: offset in bytes where the range ends + * @sync_mode: enable synchronous operation * * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as * opposed to a regular memory * cleansing writeback. The difference between @@ -518,8 +531,8 @@ EXPORT_SYMBOL(find_trylock_page); /** * find_lock_page - locate, pin and lock a pagecache page * - * @mapping - the address_space to search - * @offset - the page index + * @mapping: the address_space to search + * @offset: the page index * * Locates the desired pagecache page, locks it, increments its reference * count and returns its address. @@ -558,9 +571,9 @@ EXPORT_SYMBOL(find_lock_page); /** * find_or_create_page - locate or add a pagecache page * - * @mapping - the page's address_space - * @index - the page's index into the mapping - * @gfp_mask - page allocation mode + * @mapping: the page's address_space + * @index: the page's index into the mapping + * @gfp_mask: page allocation mode * * Locates a page in the pagecache. If the page is not present, a new page * is allocated using @gfp_mask and is added to the pagecache and to the VM's @@ -1949,7 +1962,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, buf = iov->iov_base + written; else { filemap_set_next_iovec(&cur_iov, &iov_base, written); - buf = iov->iov_base + iov_base; + buf = cur_iov->iov_base + iov_base; } do { @@ -2007,9 +2020,11 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, count -= status; pos += status; buf += status; - if (unlikely(nr_segs > 1)) + if (unlikely(nr_segs > 1)) { filemap_set_next_iovec(&cur_iov, &iov_base, status); + buf = cur_iov->iov_base + iov_base; + } } } if (unlikely(copied != bytes)) diff --git a/mm/highmem.c b/mm/highmem.c index d01276506b0..40091159946 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -325,6 +325,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) continue; mempool_free(bvec->bv_page, pool); + dec_page_state(nr_bounce); } bio_endio(bio_orig, bio_orig->bi_size, err); @@ -405,6 +406,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, to->bv_page = mempool_alloc(pool, q->bounce_gfp); to->bv_len = from->bv_len; to->bv_offset = from->bv_offset; + inc_page_state(nr_bounce); if (rw == WRITE) { char *vto, *vfrom; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a3b44a671ce..08c41da429c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -661,7 +661,7 @@ static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempol case MPOL_BIND: /* Lower zones don't get a policy applied */ /* Careful: current->mems_allowed might have moved */ - if (gfp >= policy_zone) + if ((gfp & GFP_ZONEMASK) >= policy_zone) if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist)) return policy->v.zonelist; /*FALL THROUGH*/ diff --git a/mm/mempool.c b/mm/mempool.c index b014ffeaa41..c9f3d462042 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -198,31 +198,22 @@ void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) void *element; unsigned long flags; DEFINE_WAIT(wait); - int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); + int gfp_temp; might_sleep_if(gfp_mask & __GFP_WAIT); + + gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ + gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ + gfp_mask |= __GFP_NOWARN; /* failures are OK */ + + gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); + repeat_alloc: - element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data); + + element = pool->alloc(gfp_temp, pool->pool_data); if (likely(element != NULL)) return element; - /* - * If the pool is less than 50% full and we can perform effective - * page reclaim then try harder to allocate an element. - */ - mb(); - if ((gfp_mask & __GFP_FS) && (gfp_mask != gfp_nowait) && - (pool->curr_nr <= pool->min_nr/2)) { - element = pool->alloc(gfp_mask, pool->pool_data); - if (likely(element != NULL)) - return element; - } - - /* - * Kick the VM at this point. - */ - wakeup_bdflush(0); - spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr)) { element = remove_element(pool); @@ -235,8 +226,10 @@ repeat_alloc: if (!(gfp_mask & __GFP_WAIT)) return NULL; + /* Now start performing page reclaim */ + gfp_temp = gfp_mask; prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); - mb(); + smp_mb(); if (!pool->curr_nr) io_schedule(); finish_wait(&pool->wait, &wait); @@ -257,7 +250,7 @@ void mempool_free(void *element, mempool_t *pool) { unsigned long flags; - mb(); + smp_mb(); if (pool->curr_nr < pool->min_nr) { spin_lock_irqsave(&pool->lock, flags); if (pool->curr_nr < pool->min_nr) { diff --git a/mm/mmap.c b/mm/mmap.c index 6ea204cc751..01f9793591f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -937,9 +937,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, /* mlock MCL_FUTURE? */ if (vm_flags & VM_LOCKED) { unsigned long locked, lock_limit; - locked = mm->locked_vm << PAGE_SHIFT; + locked = len >> PAGE_SHIFT; + locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - locked += len; + lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1009,8 +1010,7 @@ munmap_back: } /* Check against address space limit. */ - if ((mm->total_vm << PAGE_SHIFT) + len - > current->signal->rlim[RLIMIT_AS].rlim_cur) + if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; if (accountable && (!(flags & MAP_NORESERVE) || @@ -1421,7 +1421,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un struct rlimit *rlim = current->signal->rlim; /* address space limit tests */ - if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT) + if (!may_expand_vm(mm, grow)) return -ENOMEM; /* Stack limit test */ @@ -1823,9 +1823,10 @@ unsigned long do_brk(unsigned long addr, unsigned long len) */ if (mm->def_flags & VM_LOCKED) { unsigned long locked, lock_limit; - locked = mm->locked_vm << PAGE_SHIFT; + locked = len >> PAGE_SHIFT; + locked += mm->locked_vm; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - locked += len; + lock_limit >>= PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) return -EAGAIN; } @@ -1848,8 +1849,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) } /* Check against address space limits *after* clearing old maps... */ - if ((mm->total_vm << PAGE_SHIFT) + len - > current->signal->rlim[RLIMIT_AS].rlim_cur) + if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; if (mm->map_count > sysctl_max_map_count) @@ -2019,3 +2019,19 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, } return new_vma; } + +/* + * Return true if the calling process may expand its vm space by the passed + * number of pages + */ +int may_expand_vm(struct mm_struct *mm, unsigned long npages) +{ + unsigned long cur = mm->total_vm; /* pages */ + unsigned long lim; + + lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + + if (cur + npages > lim) + return 0; + return 1; +} diff --git a/mm/mremap.c b/mm/mremap.c index 0d1c1b9c7a0..0dd7ace94e5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -347,10 +347,10 @@ unsigned long do_mremap(unsigned long addr, if (locked > lock_limit && !capable(CAP_IPC_LOCK)) goto out; } - ret = -ENOMEM; - if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) - > current->signal->rlim[RLIMIT_AS].rlim_cur) + if (!may_expand_vm(current->mm, (new_len - old_len) >> PAGE_SHIFT)) { + ret = -ENOMEM; goto out; + } if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6ddd6a29c73..613b99a5591 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -255,7 +255,7 @@ static void balance_dirty_pages(struct address_space *mapping) /** * balance_dirty_pages_ratelimited - balance dirty memory state - * @mapping - address_space which was dirtied + * @mapping: address_space which was dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's @@ -562,8 +562,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) /** * write_one_page - write out a single page and optionally wait on I/O * - * @page - the page to write - * @wait - if true, wait on writeout + * @page: the page to write + * @wait: if true, wait on writeout * * The page must be locked by the caller and will be unlocked upon return. * diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c73dbbc1cd8..b1061b1962f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -43,7 +43,9 @@ * initializer cleaner */ nodemask_t node_online_map = { { [0] = 1UL } }; +EXPORT_SYMBOL(node_online_map); nodemask_t node_possible_map = NODE_MASK_ALL; +EXPORT_SYMBOL(node_possible_map); struct pglist_data *pgdat_list; unsigned long totalram_pages; unsigned long totalhigh_pages; @@ -799,14 +801,18 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order, } /* This allocation should allow future memory freeing. */ - if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { - /* go through the zonelist yet again, ignoring mins */ - for (i = 0; (z = zones[i]) != NULL; i++) { - if (!cpuset_zone_allowed(z)) - continue; - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; + + if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) + && !in_interrupt()) { + if (!(gfp_mask & __GFP_NOMEMALLOC)) { + /* go through the zonelist yet again, ignoring mins */ + for (i = 0; (z = zones[i]) != NULL; i++) { + if (!cpuset_zone_allowed(z)) + continue; + page = buffered_rmqueue(z, order, gfp_mask); + if (page) + goto got_pg; + } } goto nopage; } @@ -1351,8 +1357,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli #define MAX_NODE_LOAD (num_online_nodes()) static int __initdata node_load[MAX_NUMNODES]; /** - * find_next_best_node - find the next node that should appear in a given - * node's fallback list + * find_next_best_node - find the next node that should appear in a given node's fallback list * @node: node whose fallback list we're appending * @used_node_mask: nodemask_t of already used nodes * @@ -1671,6 +1676,18 @@ static void __init free_area_init_core(struct pglist_data *pgdat, if (batch < 1) batch = 1; + /* + * Clamp the batch to a 2^n - 1 value. Having a power + * of 2 value was found to be more likely to have + * suboptimal cache aliasing properties in some cases. + * + * For example if 2 tasks are alternately allocating + * batches of pages, one task can end up with a lot + * of pages of one half of the possible page colors + * and the other with pages of the other colors. + */ + batch = (1 << fls(batch + batch/2)) - 1; + for (cpu = 0; cpu < NR_CPUS; cpu++) { struct per_cpu_pages *pcp; @@ -1881,6 +1898,7 @@ static char *vmstat_text[] = { "allocstall", "pgrotated", + "nr_bounce", }; static void *vmstat_start(struct seq_file *m, loff_t *pos) diff --git a/mm/rmap.c b/mm/rmap.c index 884d6d1928b..378de234c12 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -243,6 +243,42 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) } /* + * Check that @page is mapped at @address into @mm. + * + * On success returns with mapped pte and locked mm->page_table_lock. + */ +static pte_t *page_check_address(struct page *page, struct mm_struct *mm, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* + * We need the page_table_lock to protect us from page faults, + * munmap, fork, etc... + */ + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, address); + if (likely(pgd_present(*pgd))) { + pud = pud_offset(pgd, address); + if (likely(pud_present(*pud))) { + pmd = pmd_offset(pud, address); + if (likely(pmd_present(*pmd))) { + pte = pte_offset_map(pmd, address); + if (likely(pte_present(*pte) && + page_to_pfn(page) == pte_pfn(*pte))) + return pte; + pte_unmap(pte); + } + } + } + spin_unlock(&mm->page_table_lock); + return ERR_PTR(-ENOENT); +} + +/* * Subfunctions of page_referenced: page_referenced_one called * repeatedly from either page_referenced_anon or page_referenced_file. */ @@ -251,9 +287,6 @@ static int page_referenced_one(struct page *page, { struct mm_struct *mm = vma->vm_mm; unsigned long address; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; pte_t *pte; int referenced = 0; @@ -263,39 +296,18 @@ static int page_referenced_one(struct page *page, if (address == -EFAULT) goto out; - spin_lock(&mm->page_table_lock); - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto out_unlock; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - goto out_unlock; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto out_unlock; - - pte = pte_offset_map(pmd, address); - if (!pte_present(*pte)) - goto out_unmap; - - if (page_to_pfn(page) != pte_pfn(*pte)) - goto out_unmap; - - if (ptep_clear_flush_young(vma, address, pte)) - referenced++; - - if (mm != current->mm && !ignore_token && has_swap_token(mm)) - referenced++; + pte = page_check_address(page, mm, address); + if (!IS_ERR(pte)) { + if (ptep_clear_flush_young(vma, address, pte)) + referenced++; - (*mapcount)--; + if (mm != current->mm && !ignore_token && has_swap_token(mm)) + referenced++; -out_unmap: - pte_unmap(pte); -out_unlock: - spin_unlock(&mm->page_table_lock); + (*mapcount)--; + pte_unmap(pte); + spin_unlock(&mm->page_table_lock); + } out: return referenced; } @@ -502,9 +514,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; unsigned long address; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; pte_t *pte; pte_t pteval; int ret = SWAP_AGAIN; @@ -515,30 +524,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) if (address == -EFAULT) goto out; - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - spin_lock(&mm->page_table_lock); - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - goto out_unlock; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - goto out_unlock; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - goto out_unlock; - - pte = pte_offset_map(pmd, address); - if (!pte_present(*pte)) - goto out_unmap; - - if (page_to_pfn(page) != pte_pfn(*pte)) - goto out_unmap; + pte = page_check_address(page, mm, address); + if (IS_ERR(pte)) + goto out; /* * If the page is mlock()d, we cannot swap it out. @@ -604,7 +592,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) out_unmap: pte_unmap(pte); -out_unlock: spin_unlock(&mm->page_table_lock); out: return ret; @@ -708,7 +695,6 @@ static void try_to_unmap_cluster(unsigned long cursor, } pte_unmap(pte); - out_unlock: spin_unlock(&mm->page_table_lock); } @@ -860,3 +846,4 @@ int try_to_unmap(struct page *page) ret = SWAP_SUCCESS; return ret; } + diff --git a/mm/slab.c b/mm/slab.c index ec660d85ddd..84074264115 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -583,7 +583,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep) return cachep->array[smp_processor_id()]; } -static inline kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags) +static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -607,6 +607,12 @@ static inline kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags) return csizep->cs_cachep; } +kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags) +{ + return __find_general_cachep(size, gfpflags); +} +EXPORT_SYMBOL(kmem_find_general_cachep); + /* Cal the num objs, wastage, and bytes left over for a given slab size. */ static void cache_estimate(unsigned long gfporder, size_t size, size_t align, int flags, size_t *left_over, unsigned int *num) @@ -672,14 +678,11 @@ static struct array_cache *alloc_arraycache(int cpu, int entries, int memsize = sizeof(void*)*entries+sizeof(struct array_cache); struct array_cache *nc = NULL; - if (cpu != -1) { - kmem_cache_t *cachep; - cachep = kmem_find_general_cachep(memsize, GFP_KERNEL); - if (cachep) - nc = kmem_cache_alloc_node(cachep, cpu_to_node(cpu)); - } - if (!nc) + if (cpu == -1) nc = kmalloc(memsize, GFP_KERNEL); + else + nc = kmalloc_node(memsize, GFP_KERNEL, cpu_to_node(cpu)); + if (nc) { nc->avail = 0; nc->limit = entries; @@ -1663,7 +1666,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep) } if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) - synchronize_kernel(); + synchronize_rcu(); /* no cpu_online check required here since we clear the percpu * array on cpu offline and set this to NULL. @@ -2361,7 +2364,7 @@ out: * and can sleep. And it will allocate memory on the given node, which * can improve the performance for cpu bound structures. */ -void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid) +void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid) { int loop; void *objp; @@ -2393,7 +2396,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid) spin_unlock_irq(&cachep->spinlock); local_irq_disable(); - if (!cache_grow(cachep, GFP_KERNEL, nodeid)) { + if (!cache_grow(cachep, flags, nodeid)) { local_irq_enable(); return NULL; } @@ -2435,6 +2438,16 @@ got_slabp: } EXPORT_SYMBOL(kmem_cache_alloc_node); +void *kmalloc_node(size_t size, int flags, int node) +{ + kmem_cache_t *cachep; + + cachep = kmem_find_general_cachep(size, flags); + if (unlikely(cachep == NULL)) + return NULL; + return kmem_cache_alloc_node(cachep, flags, node); +} +EXPORT_SYMBOL(kmalloc_node); #endif /** @@ -2462,7 +2475,12 @@ void *__kmalloc(size_t size, unsigned int __nocast flags) { kmem_cache_t *cachep; - cachep = kmem_find_general_cachep(size, flags); + /* If you want to save a few bytes .text space: replace + * __ with kmem_. + * Then kmalloc uses the uninlined functions instead of the inline + * functions. + */ + cachep = __find_general_cachep(size, flags); if (unlikely(cachep == NULL)) return NULL; return __cache_alloc(cachep, flags); @@ -2489,9 +2507,8 @@ void *__alloc_percpu(size_t size, size_t align) for (i = 0; i < NR_CPUS; i++) { if (!cpu_possible(i)) continue; - pdata->ptrs[i] = kmem_cache_alloc_node( - kmem_find_general_cachep(size, GFP_KERNEL), - cpu_to_node(i)); + pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, + cpu_to_node(i)); if (!pdata->ptrs[i]) goto unwind_oom; diff --git a/mm/swap_state.c b/mm/swap_state.c index a063a902ed0..4f251775ef9 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -143,7 +143,6 @@ void __delete_from_swap_cache(struct page *page) int add_to_swap(struct page * page) { swp_entry_t entry; - int pf_flags; int err; if (!PageLocked(page)) @@ -154,29 +153,19 @@ int add_to_swap(struct page * page) if (!entry.val) return 0; - /* Radix-tree node allocations are performing - * GFP_ATOMIC allocations under PF_MEMALLOC. - * They can completely exhaust the page allocator. - * - * So PF_MEMALLOC is dropped here. This causes the slab - * allocations to fail earlier, so radix-tree nodes will - * then be allocated from the mempool reserves. + /* + * Radix-tree node allocations from PF_MEMALLOC contexts could + * completely exhaust the page allocator. __GFP_NOMEMALLOC + * stops emergency reserves from being allocated. * - * We're still using __GFP_HIGH for radix-tree node - * allocations, so some of the emergency pools are available, - * just not all of them. + * TODO: this could cause a theoretical memory reclaim + * deadlock in the swap out path. */ - - pf_flags = current->flags; - current->flags &= ~PF_MEMALLOC; - /* * Add it to the swap cache and mark it dirty */ - err = __add_to_swap_cache(page, entry, GFP_ATOMIC|__GFP_NOWARN); - - if (pf_flags & PF_MEMALLOC) - current->flags |= PF_MEMALLOC; + err = __add_to_swap_cache(page, entry, + GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); switch (err) { case 0: /* Success */ diff --git a/mm/truncate.c b/mm/truncate.c index c9a63f0b69a..60c8764bfac 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -242,7 +242,7 @@ EXPORT_SYMBOL(invalidate_inode_pages); /** * invalidate_inode_pages2_range - remove range of pages from an address_space - * @mapping - the address_space + * @mapping: the address_space * @start: the page offset 'from' which to invalidate * @end: the page offset 'to' which to invalidate (inclusive) * @@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); /** * invalidate_inode_pages2 - remove all pages from an address_space - * @mapping - the address_space + * @mapping: the address_space * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c6182f6f130..2bd83e5c2bb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -475,6 +475,10 @@ void *vmalloc(unsigned long size) EXPORT_SYMBOL(vmalloc); +#ifndef PAGE_KERNEL_EXEC +# define PAGE_KERNEL_EXEC PAGE_KERNEL +#endif + /** * vmalloc_exec - allocate virtually contiguous, executable memory * @@ -488,10 +492,6 @@ EXPORT_SYMBOL(vmalloc); * use __vmalloc() instead. */ -#ifndef PAGE_KERNEL_EXEC -# define PAGE_KERNEL_EXEC PAGE_KERNEL -#endif - void *vmalloc_exec(unsigned long size) { return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); |