summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c51
-rw-r--r--mm/highmem.c2
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mempool.c35
-rw-r--r--mm/mmap.c34
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/page_alloc.c38
-rw-r--r--mm/rmap.c113
-rw-r--r--mm/slab.c47
-rw-r--r--mm/swap_state.c27
-rw-r--r--mm/truncate.c4
-rw-r--r--mm/vmalloc.c8
13 files changed, 205 insertions, 168 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 93595c327bb..47263ac3e4e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -29,11 +29,6 @@
#include <linux/security.h>
#include <linux/syscalls.h>
/*
- * This is needed for the following functions:
- * - try_to_release_page
- * - block_invalidatepage
- * - generic_osync_inode
- *
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
#include <linux/buffer_head.h> /* for generic_osync_inode */
@@ -123,8 +118,7 @@ void remove_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
- if (unlikely(!PageLocked(page)))
- PAGE_BUG(page);
+ BUG_ON(!PageLocked(page));
write_lock_irq(&mapping->tree_lock);
__remove_from_page_cache(page);
@@ -139,7 +133,25 @@ static int sync_page(void *word)
page = container_of((page_flags_t *)word, struct page, flags);
/*
- * FIXME, fercrissake. What is this barrier here for?
+ * page_mapping() is being called without PG_locked held.
+ * Some knowledge of the state and use of the page is used to
+ * reduce the requirements down to a memory barrier.
+ * The danger here is of a stale page_mapping() return value
+ * indicating a struct address_space different from the one it's
+ * associated with when it is associated with one.
+ * After smp_mb(), it's either the correct page_mapping() for
+ * the page, or an old page_mapping() and the page's own
+ * page_mapping() has gone NULL.
+ * The ->sync_page() address_space operation must tolerate
+ * page_mapping() going NULL. By an amazing coincidence,
+ * this comes about because none of the users of the page
+ * in the ->sync_page() methods make essential use of the
+ * page_mapping(), merely passing the page down to the backing
+ * device's unplug functions when it's non-NULL, which in turn
+ * ignore it for all cases but swap, where only page->private is
+ * of interest. When page_mapping() does go NULL, the entire
+ * call stack gracefully ignores the page and returns.
+ * -- wli
*/
smp_mb();
mapping = page_mapping(page);
@@ -152,9 +164,10 @@ static int sync_page(void *word)
/**
* filemap_fdatawrite_range - start writeback against all of a mapping's
* dirty pages that lie within the byte offsets <start, end>
- * @mapping: address space structure to write
- * @start: offset in bytes where the range starts
- * @end : offset in bytes where the range ends
+ * @mapping: address space structure to write
+ * @start: offset in bytes where the range starts
+ * @end: offset in bytes where the range ends
+ * @sync_mode: enable synchronous operation
*
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
* opposed to a regular memory * cleansing writeback. The difference between
@@ -518,8 +531,8 @@ EXPORT_SYMBOL(find_trylock_page);
/**
* find_lock_page - locate, pin and lock a pagecache page
*
- * @mapping - the address_space to search
- * @offset - the page index
+ * @mapping: the address_space to search
+ * @offset: the page index
*
* Locates the desired pagecache page, locks it, increments its reference
* count and returns its address.
@@ -558,9 +571,9 @@ EXPORT_SYMBOL(find_lock_page);
/**
* find_or_create_page - locate or add a pagecache page
*
- * @mapping - the page's address_space
- * @index - the page's index into the mapping
- * @gfp_mask - page allocation mode
+ * @mapping: the page's address_space
+ * @index: the page's index into the mapping
+ * @gfp_mask: page allocation mode
*
* Locates a page in the pagecache. If the page is not present, a new page
* is allocated using @gfp_mask and is added to the pagecache and to the VM's
@@ -1949,7 +1962,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
buf = iov->iov_base + written;
else {
filemap_set_next_iovec(&cur_iov, &iov_base, written);
- buf = iov->iov_base + iov_base;
+ buf = cur_iov->iov_base + iov_base;
}
do {
@@ -2007,9 +2020,11 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
count -= status;
pos += status;
buf += status;
- if (unlikely(nr_segs > 1))
+ if (unlikely(nr_segs > 1)) {
filemap_set_next_iovec(&cur_iov,
&iov_base, status);
+ buf = cur_iov->iov_base + iov_base;
+ }
}
}
if (unlikely(copied != bytes))
diff --git a/mm/highmem.c b/mm/highmem.c
index d01276506b0..40091159946 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -325,6 +325,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
continue;
mempool_free(bvec->bv_page, pool);
+ dec_page_state(nr_bounce);
}
bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -405,6 +406,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
to->bv_len = from->bv_len;
to->bv_offset = from->bv_offset;
+ inc_page_state(nr_bounce);
if (rw == WRITE) {
char *vto, *vfrom;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a3b44a671ce..08c41da429c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -661,7 +661,7 @@ static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempol
case MPOL_BIND:
/* Lower zones don't get a policy applied */
/* Careful: current->mems_allowed might have moved */
- if (gfp >= policy_zone)
+ if ((gfp & GFP_ZONEMASK) >= policy_zone)
if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
return policy->v.zonelist;
/*FALL THROUGH*/
diff --git a/mm/mempool.c b/mm/mempool.c
index b014ffeaa41..c9f3d462042 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -198,31 +198,22 @@ void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask)
void *element;
unsigned long flags;
DEFINE_WAIT(wait);
- int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+ int gfp_temp;
might_sleep_if(gfp_mask & __GFP_WAIT);
+
+ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_mask |= __GFP_NOWARN; /* failures are OK */
+
+ gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
+
repeat_alloc:
- element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data);
+
+ element = pool->alloc(gfp_temp, pool->pool_data);
if (likely(element != NULL))
return element;
- /*
- * If the pool is less than 50% full and we can perform effective
- * page reclaim then try harder to allocate an element.
- */
- mb();
- if ((gfp_mask & __GFP_FS) && (gfp_mask != gfp_nowait) &&
- (pool->curr_nr <= pool->min_nr/2)) {
- element = pool->alloc(gfp_mask, pool->pool_data);
- if (likely(element != NULL))
- return element;
- }
-
- /*
- * Kick the VM at this point.
- */
- wakeup_bdflush(0);
-
spin_lock_irqsave(&pool->lock, flags);
if (likely(pool->curr_nr)) {
element = remove_element(pool);
@@ -235,8 +226,10 @@ repeat_alloc:
if (!(gfp_mask & __GFP_WAIT))
return NULL;
+ /* Now start performing page reclaim */
+ gfp_temp = gfp_mask;
prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
- mb();
+ smp_mb();
if (!pool->curr_nr)
io_schedule();
finish_wait(&pool->wait, &wait);
@@ -257,7 +250,7 @@ void mempool_free(void *element, mempool_t *pool)
{
unsigned long flags;
- mb();
+ smp_mb();
if (pool->curr_nr < pool->min_nr) {
spin_lock_irqsave(&pool->lock, flags);
if (pool->curr_nr < pool->min_nr) {
diff --git a/mm/mmap.c b/mm/mmap.c
index 6ea204cc751..01f9793591f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -937,9 +937,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
- locked = mm->locked_vm << PAGE_SHIFT;
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
- locked += len;
+ lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
@@ -1009,8 +1010,7 @@ munmap_back:
}
/* Check against address space limit. */
- if ((mm->total_vm << PAGE_SHIFT) + len
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (accountable && (!(flags & MAP_NORESERVE) ||
@@ -1421,7 +1421,7 @@ static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, un
struct rlimit *rlim = current->signal->rlim;
/* address space limit tests */
- if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT)
+ if (!may_expand_vm(mm, grow))
return -ENOMEM;
/* Stack limit test */
@@ -1823,9 +1823,10 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
*/
if (mm->def_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
- locked = mm->locked_vm << PAGE_SHIFT;
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
- locked += len;
+ lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
@@ -1848,8 +1849,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
}
/* Check against address space limits *after* clearing old maps... */
- if ((mm->total_vm << PAGE_SHIFT) + len
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
@@ -2019,3 +2019,19 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
}
return new_vma;
}
+
+/*
+ * Return true if the calling process may expand its vm space by the passed
+ * number of pages
+ */
+int may_expand_vm(struct mm_struct *mm, unsigned long npages)
+{
+ unsigned long cur = mm->total_vm; /* pages */
+ unsigned long lim;
+
+ lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+
+ if (cur + npages > lim)
+ return 0;
+ return 1;
+}
diff --git a/mm/mremap.c b/mm/mremap.c
index 0d1c1b9c7a0..0dd7ace94e5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -347,10 +347,10 @@ unsigned long do_mremap(unsigned long addr,
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
goto out;
}
- ret = -ENOMEM;
- if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ if (!may_expand_vm(current->mm, (new_len - old_len) >> PAGE_SHIFT)) {
+ ret = -ENOMEM;
goto out;
+ }
if (vma->vm_flags & VM_ACCOUNT) {
charged = (new_len - old_len) >> PAGE_SHIFT;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6ddd6a29c73..613b99a5591 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -255,7 +255,7 @@ static void balance_dirty_pages(struct address_space *mapping)
/**
* balance_dirty_pages_ratelimited - balance dirty memory state
- * @mapping - address_space which was dirtied
+ * @mapping: address_space which was dirtied
*
* Processes which are dirtying memory should call in here once for each page
* which was newly dirtied. The function will periodically check the system's
@@ -562,8 +562,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
/**
* write_one_page - write out a single page and optionally wait on I/O
*
- * @page - the page to write
- * @wait - if true, wait on writeout
+ * @page: the page to write
+ * @wait: if true, wait on writeout
*
* The page must be locked by the caller and will be unlocked upon return.
*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c73dbbc1cd8..b1061b1962f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -43,7 +43,9 @@
* initializer cleaner
*/
nodemask_t node_online_map = { { [0] = 1UL } };
+EXPORT_SYMBOL(node_online_map);
nodemask_t node_possible_map = NODE_MASK_ALL;
+EXPORT_SYMBOL(node_possible_map);
struct pglist_data *pgdat_list;
unsigned long totalram_pages;
unsigned long totalhigh_pages;
@@ -799,14 +801,18 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
}
/* This allocation should allow future memory freeing. */
- if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) {
- /* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!cpuset_zone_allowed(z))
- continue;
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
+
+ if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+ && !in_interrupt()) {
+ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+ /* go through the zonelist yet again, ignoring mins */
+ for (i = 0; (z = zones[i]) != NULL; i++) {
+ if (!cpuset_zone_allowed(z))
+ continue;
+ page = buffered_rmqueue(z, order, gfp_mask);
+ if (page)
+ goto got_pg;
+ }
}
goto nopage;
}
@@ -1351,8 +1357,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
#define MAX_NODE_LOAD (num_online_nodes())
static int __initdata node_load[MAX_NUMNODES];
/**
- * find_next_best_node - find the next node that should appear in a given
- * node's fallback list
+ * find_next_best_node - find the next node that should appear in a given node's fallback list
* @node: node whose fallback list we're appending
* @used_node_mask: nodemask_t of already used nodes
*
@@ -1671,6 +1676,18 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
if (batch < 1)
batch = 1;
+ /*
+ * Clamp the batch to a 2^n - 1 value. Having a power
+ * of 2 value was found to be more likely to have
+ * suboptimal cache aliasing properties in some cases.
+ *
+ * For example if 2 tasks are alternately allocating
+ * batches of pages, one task can end up with a lot
+ * of pages of one half of the possible page colors
+ * and the other with pages of the other colors.
+ */
+ batch = (1 << fls(batch + batch/2)) - 1;
+
for (cpu = 0; cpu < NR_CPUS; cpu++) {
struct per_cpu_pages *pcp;
@@ -1881,6 +1898,7 @@ static char *vmstat_text[] = {
"allocstall",
"pgrotated",
+ "nr_bounce",
};
static void *vmstat_start(struct seq_file *m, loff_t *pos)
diff --git a/mm/rmap.c b/mm/rmap.c
index 884d6d1928b..378de234c12 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -243,6 +243,42 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
}
/*
+ * Check that @page is mapped at @address into @mm.
+ *
+ * On success returns with mapped pte and locked mm->page_table_lock.
+ */
+static pte_t *page_check_address(struct page *page, struct mm_struct *mm,
+ unsigned long address)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /*
+ * We need the page_table_lock to protect us from page faults,
+ * munmap, fork, etc...
+ */
+ spin_lock(&mm->page_table_lock);
+ pgd = pgd_offset(mm, address);
+ if (likely(pgd_present(*pgd))) {
+ pud = pud_offset(pgd, address);
+ if (likely(pud_present(*pud))) {
+ pmd = pmd_offset(pud, address);
+ if (likely(pmd_present(*pmd))) {
+ pte = pte_offset_map(pmd, address);
+ if (likely(pte_present(*pte) &&
+ page_to_pfn(page) == pte_pfn(*pte)))
+ return pte;
+ pte_unmap(pte);
+ }
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+ return ERR_PTR(-ENOENT);
+}
+
+/*
* Subfunctions of page_referenced: page_referenced_one called
* repeatedly from either page_referenced_anon or page_referenced_file.
*/
@@ -251,9 +287,6 @@ static int page_referenced_one(struct page *page,
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
int referenced = 0;
@@ -263,39 +296,18 @@ static int page_referenced_one(struct page *page,
if (address == -EFAULT)
goto out;
- spin_lock(&mm->page_table_lock);
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- goto out_unlock;
-
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- goto out_unlock;
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- goto out_unlock;
-
- pte = pte_offset_map(pmd, address);
- if (!pte_present(*pte))
- goto out_unmap;
-
- if (page_to_pfn(page) != pte_pfn(*pte))
- goto out_unmap;
-
- if (ptep_clear_flush_young(vma, address, pte))
- referenced++;
-
- if (mm != current->mm && !ignore_token && has_swap_token(mm))
- referenced++;
+ pte = page_check_address(page, mm, address);
+ if (!IS_ERR(pte)) {
+ if (ptep_clear_flush_young(vma, address, pte))
+ referenced++;
- (*mapcount)--;
+ if (mm != current->mm && !ignore_token && has_swap_token(mm))
+ referenced++;
-out_unmap:
- pte_unmap(pte);
-out_unlock:
- spin_unlock(&mm->page_table_lock);
+ (*mapcount)--;
+ pte_unmap(pte);
+ spin_unlock(&mm->page_table_lock);
+ }
out:
return referenced;
}
@@ -502,9 +514,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
pte_t pteval;
int ret = SWAP_AGAIN;
@@ -515,30 +524,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
if (address == -EFAULT)
goto out;
- /*
- * We need the page_table_lock to protect us from page faults,
- * munmap, fork, etc...
- */
- spin_lock(&mm->page_table_lock);
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- goto out_unlock;
-
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- goto out_unlock;
-
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- goto out_unlock;
-
- pte = pte_offset_map(pmd, address);
- if (!pte_present(*pte))
- goto out_unmap;
-
- if (page_to_pfn(page) != pte_pfn(*pte))
- goto out_unmap;
+ pte = page_check_address(page, mm, address);
+ if (IS_ERR(pte))
+ goto out;
/*
* If the page is mlock()d, we cannot swap it out.
@@ -604,7 +592,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
out_unmap:
pte_unmap(pte);
-out_unlock:
spin_unlock(&mm->page_table_lock);
out:
return ret;
@@ -708,7 +695,6 @@ static void try_to_unmap_cluster(unsigned long cursor,
}
pte_unmap(pte);
-
out_unlock:
spin_unlock(&mm->page_table_lock);
}
@@ -860,3 +846,4 @@ int try_to_unmap(struct page *page)
ret = SWAP_SUCCESS;
return ret;
}
+
diff --git a/mm/slab.c b/mm/slab.c
index ec660d85ddd..84074264115 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -583,7 +583,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep)
return cachep->array[smp_processor_id()];
}
-static inline kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
+static inline kmem_cache_t *__find_general_cachep(size_t size, int gfpflags)
{
struct cache_sizes *csizep = malloc_sizes;
@@ -607,6 +607,12 @@ static inline kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
return csizep->cs_cachep;
}
+kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags)
+{
+ return __find_general_cachep(size, gfpflags);
+}
+EXPORT_SYMBOL(kmem_find_general_cachep);
+
/* Cal the num objs, wastage, and bytes left over for a given slab size. */
static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
int flags, size_t *left_over, unsigned int *num)
@@ -672,14 +678,11 @@ static struct array_cache *alloc_arraycache(int cpu, int entries,
int memsize = sizeof(void*)*entries+sizeof(struct array_cache);
struct array_cache *nc = NULL;
- if (cpu != -1) {
- kmem_cache_t *cachep;
- cachep = kmem_find_general_cachep(memsize, GFP_KERNEL);
- if (cachep)
- nc = kmem_cache_alloc_node(cachep, cpu_to_node(cpu));
- }
- if (!nc)
+ if (cpu == -1)
nc = kmalloc(memsize, GFP_KERNEL);
+ else
+ nc = kmalloc_node(memsize, GFP_KERNEL, cpu_to_node(cpu));
+
if (nc) {
nc->avail = 0;
nc->limit = entries;
@@ -1663,7 +1666,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
}
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
- synchronize_kernel();
+ synchronize_rcu();
/* no cpu_online check required here since we clear the percpu
* array on cpu offline and set this to NULL.
@@ -2361,7 +2364,7 @@ out:
* and can sleep. And it will allocate memory on the given node, which
* can improve the performance for cpu bound structures.
*/
-void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid)
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int nodeid)
{
int loop;
void *objp;
@@ -2393,7 +2396,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid)
spin_unlock_irq(&cachep->spinlock);
local_irq_disable();
- if (!cache_grow(cachep, GFP_KERNEL, nodeid)) {
+ if (!cache_grow(cachep, flags, nodeid)) {
local_irq_enable();
return NULL;
}
@@ -2435,6 +2438,16 @@ got_slabp:
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+void *kmalloc_node(size_t size, int flags, int node)
+{
+ kmem_cache_t *cachep;
+
+ cachep = kmem_find_general_cachep(size, flags);
+ if (unlikely(cachep == NULL))
+ return NULL;
+ return kmem_cache_alloc_node(cachep, flags, node);
+}
+EXPORT_SYMBOL(kmalloc_node);
#endif
/**
@@ -2462,7 +2475,12 @@ void *__kmalloc(size_t size, unsigned int __nocast flags)
{
kmem_cache_t *cachep;
- cachep = kmem_find_general_cachep(size, flags);
+ /* If you want to save a few bytes .text space: replace
+ * __ with kmem_.
+ * Then kmalloc uses the uninlined functions instead of the inline
+ * functions.
+ */
+ cachep = __find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
return __cache_alloc(cachep, flags);
@@ -2489,9 +2507,8 @@ void *__alloc_percpu(size_t size, size_t align)
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
- pdata->ptrs[i] = kmem_cache_alloc_node(
- kmem_find_general_cachep(size, GFP_KERNEL),
- cpu_to_node(i));
+ pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(i));
if (!pdata->ptrs[i])
goto unwind_oom;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index a063a902ed0..4f251775ef9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -143,7 +143,6 @@ void __delete_from_swap_cache(struct page *page)
int add_to_swap(struct page * page)
{
swp_entry_t entry;
- int pf_flags;
int err;
if (!PageLocked(page))
@@ -154,29 +153,19 @@ int add_to_swap(struct page * page)
if (!entry.val)
return 0;
- /* Radix-tree node allocations are performing
- * GFP_ATOMIC allocations under PF_MEMALLOC.
- * They can completely exhaust the page allocator.
- *
- * So PF_MEMALLOC is dropped here. This causes the slab
- * allocations to fail earlier, so radix-tree nodes will
- * then be allocated from the mempool reserves.
+ /*
+ * Radix-tree node allocations from PF_MEMALLOC contexts could
+ * completely exhaust the page allocator. __GFP_NOMEMALLOC
+ * stops emergency reserves from being allocated.
*
- * We're still using __GFP_HIGH for radix-tree node
- * allocations, so some of the emergency pools are available,
- * just not all of them.
+ * TODO: this could cause a theoretical memory reclaim
+ * deadlock in the swap out path.
*/
-
- pf_flags = current->flags;
- current->flags &= ~PF_MEMALLOC;
-
/*
* Add it to the swap cache and mark it dirty
*/
- err = __add_to_swap_cache(page, entry, GFP_ATOMIC|__GFP_NOWARN);
-
- if (pf_flags & PF_MEMALLOC)
- current->flags |= PF_MEMALLOC;
+ err = __add_to_swap_cache(page, entry,
+ GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN);
switch (err) {
case 0: /* Success */
diff --git a/mm/truncate.c b/mm/truncate.c
index c9a63f0b69a..60c8764bfac 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -242,7 +242,7 @@ EXPORT_SYMBOL(invalidate_inode_pages);
/**
* invalidate_inode_pages2_range - remove range of pages from an address_space
- * @mapping - the address_space
+ * @mapping: the address_space
* @start: the page offset 'from' which to invalidate
* @end: the page offset 'to' which to invalidate (inclusive)
*
@@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
/**
* invalidate_inode_pages2 - remove all pages from an address_space
- * @mapping - the address_space
+ * @mapping: the address_space
*
* Any pages which are found to be mapped into pagetables are unmapped prior to
* invalidation.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c6182f6f130..2bd83e5c2bb 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -475,6 +475,10 @@ void *vmalloc(unsigned long size)
EXPORT_SYMBOL(vmalloc);
+#ifndef PAGE_KERNEL_EXEC
+# define PAGE_KERNEL_EXEC PAGE_KERNEL
+#endif
+
/**
* vmalloc_exec - allocate virtually contiguous, executable memory
*
@@ -488,10 +492,6 @@ EXPORT_SYMBOL(vmalloc);
* use __vmalloc() instead.
*/
-#ifndef PAGE_KERNEL_EXEC
-# define PAGE_KERNEL_EXEC PAGE_KERNEL
-#endif
-
void *vmalloc_exec(unsigned long size)
{
return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);