summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/allocpercpu.c2
-rw-r--r--mm/backing-dev.c10
-rw-r--r--mm/failslab.c1
-rw-r--r--mm/filemap.c24
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/migrate.c10
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/pdflush.c47
-rw-r--r--mm/quicklist.c2
-rw-r--r--mm/readahead.c40
-rw-r--r--mm/shmem.c27
-rw-r--r--mm/slab.c71
-rw-r--r--mm/slob.c31
-rw-r--r--mm/slub.c77
-rw-r--r--mm/swap.c4
-rw-r--r--mm/truncate.c10
-rw-r--r--mm/util.c32
-rw-r--r--mm/vmscan.c12
20 files changed, 344 insertions, 70 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index b53427ad30a..57971d2ab84 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -213,6 +213,8 @@ config UNEVICTABLE_LRU
will use one page flag and increase the code size a little,
say Y unless you know what you are doing.
+ See Documentation/vm/unevictable-lru.txt for more information.
+
config HAVE_MLOCK
bool
default y if MMU=y
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 139d5b7b662..dfdee6a4735 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -31,7 +31,7 @@ static void percpu_depopulate(void *__pdata, int cpu)
* @__pdata: per-cpu data to depopulate
* @mask: depopulate per-cpu data for cpu's selected through mask bits
*/
-static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask)
{
int cpu;
for_each_cpu_mask_nr(cpu, *mask)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index be68c956a66..493b468a503 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -284,12 +284,12 @@ static wait_queue_head_t congestion_wqh[2] = {
};
-void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
{
enum bdi_state bit;
- wait_queue_head_t *wqh = &congestion_wqh[rw];
+ wait_queue_head_t *wqh = &congestion_wqh[sync];
- bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+ bit = sync ? BDI_sync_congested : BDI_async_congested;
clear_bit(bit, &bdi->state);
smp_mb__after_clear_bit();
if (waitqueue_active(wqh))
@@ -297,11 +297,11 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
}
EXPORT_SYMBOL(clear_bdi_congested);
-void set_bdi_congested(struct backing_dev_info *bdi, int rw)
+void set_bdi_congested(struct backing_dev_info *bdi, int sync)
{
enum bdi_state bit;
- bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+ bit = sync ? BDI_sync_congested : BDI_async_congested;
set_bit(bit, &bdi->state);
}
EXPORT_SYMBOL(set_bdi_congested);
diff --git a/mm/failslab.c b/mm/failslab.c
index 7c6ea6493f8..9339de5f0a9 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -1,4 +1,5 @@
#include <linux/fault-inject.h>
+#include <linux/gfp.h>
static struct {
struct fault_attr attr;
diff --git a/mm/filemap.c b/mm/filemap.c
index 126d3973b3d..379ff0bcbf6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -441,6 +441,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
}
return err;
}
+EXPORT_SYMBOL(filemap_write_and_wait_range);
/**
* add_to_page_cache_locked - add a locked page to the pagecache
@@ -513,6 +514,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
}
return ret;
}
+EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
#ifdef CONFIG_NUMA
struct page *__page_cache_alloc(gfp_t gfp)
@@ -565,6 +567,24 @@ void wait_on_page_bit(struct page *page, int bit_nr)
EXPORT_SYMBOL(wait_on_page_bit);
/**
+ * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
+ * @page: Page defining the wait queue of interest
+ * @waiter: Waiter to add to the queue
+ *
+ * Add an arbitrary @waiter to the wait queue for the nominated @page.
+ */
+void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
+{
+ wait_queue_head_t *q = page_waitqueue(page);
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->lock, flags);
+ __add_wait_queue(q, waiter);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL_GPL(add_page_wait_queue);
+
+/**
* unlock_page - unlock a locked page
* @page: the page
*
@@ -627,6 +647,7 @@ int __lock_page_killable(struct page *page)
return __wait_on_bit_lock(page_waitqueue(page), &wait,
sync_page_killable, TASK_KILLABLE);
}
+EXPORT_SYMBOL_GPL(__lock_page_killable);
/**
* __lock_page_nosync - get a lock on the page, without calling sync_page()
@@ -2463,6 +2484,9 @@ EXPORT_SYMBOL(generic_file_aio_write);
* (presumably at page->private). If the release was successful, return `1'.
* Otherwise return zero.
*
+ * This may also be called if PG_fscache is set on a page, indicating that the
+ * page is known to the local caching routines.
+ *
* The @gfp_mask argument specifies whether I/O may be performed to release
* this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fc6d6c4823..e44fb0fbb80 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
if (unlikely(!mem))
return 0;
- VM_BUG_ON(mem_cgroup_is_obsolete(mem));
+ VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
while (1) {
int ret;
diff --git a/mm/migrate.c b/mm/migrate.c
index a9eff3f092f..068655d8f88 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -250,7 +250,7 @@ out:
* The number of remaining references must be:
* 1 for anonymous pages without a mapping
* 2 for pages with a mapping
- * 3 for pages with a mapping and PagePrivate set.
+ * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
*/
static int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page)
@@ -270,7 +270,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(page));
- expected_count = 2 + !!PagePrivate(page);
+ expected_count = 2 + !!page_has_private(page);
if (page_count(page) != expected_count ||
(struct page *)radix_tree_deref_slot(pslot) != page) {
spin_unlock_irq(&mapping->tree_lock);
@@ -386,7 +386,7 @@ EXPORT_SYMBOL(fail_migrate_page);
/*
* Common logic to directly migrate a single page suitable for
- * pages that do not use PagePrivate.
+ * pages that do not use PagePrivate/PagePrivate2.
*
* Pages are locked upon entry and exit.
*/
@@ -522,7 +522,7 @@ static int fallback_migrate_page(struct address_space *mapping,
* Buffers may be managed in a filesystem specific way.
* We must have no buffers or drop them.
*/
- if (PagePrivate(page) &&
+ if (page_has_private(page) &&
!try_to_release_page(page, GFP_KERNEL))
return -EAGAIN;
@@ -655,7 +655,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
* free the metadata, so the page can be freed.
*/
if (!page->mapping) {
- if (!PageAnon(page) && PagePrivate(page)) {
+ if (!PageAnon(page) && page_has_private(page)) {
/*
* Go direct to try_to_free_buffers() here because
* a) that's what try_to_release_page() would do anyway
diff --git a/mm/mmap.c b/mm/mmap.c
index 4a3841186c1..3303d1ba8e8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1575,7 +1575,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
* Overcommit.. This must be the final test, as it will
* update security statistics.
*/
- if (security_vm_enough_memory(grow))
+ if (security_vm_enough_memory_mm(mm, grow))
return -ENOMEM;
/* Ok, everything looks good - let it rip */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0284e528748..e2f26991fff 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -331,7 +331,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
- if (unlikely(!PageTail(p) | (p->first_page != page))) {
+ if (unlikely(!PageTail(p) || (p->first_page != page))) {
bad_page(page);
bad++;
}
@@ -2128,7 +2128,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
int n, val;
int min_val = INT_MAX;
int best_node = -1;
- node_to_cpumask_ptr(tmp, 0);
+ const struct cpumask *tmp = cpumask_of_node(0);
/* Use the local node if we haven't already */
if (!node_isset(node, *used_node_mask)) {
@@ -2149,8 +2149,8 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
val += (n < node);
/* Give preference to headless and unused nodes */
- node_to_cpumask_ptr_next(tmp, n);
- if (!cpus_empty(*tmp))
+ tmp = cpumask_of_node(n);
+ if (!cpumask_empty(tmp))
val += PENALTY_FOR_NODE_WITH_CPUS;
/* Slight preference for less loaded node */
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 118905e3d78..f2caf96993f 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -58,6 +58,14 @@ static DEFINE_SPINLOCK(pdflush_lock);
int nr_pdflush_threads = 0;
/*
+ * The max/min number of pdflush threads. R/W by sysctl at
+ * /proc/sys/vm/nr_pdflush_threads_max/min
+ */
+int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS;
+int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS;
+
+
+/*
* The time at which the pdflush thread pool last went empty
*/
static unsigned long last_empty_jifs;
@@ -68,7 +76,7 @@ static unsigned long last_empty_jifs;
* Thread pool management algorithm:
*
* - The minimum and maximum number of pdflush instances are bound
- * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
+ * by nr_pdflush_threads_min and nr_pdflush_threads_max.
*
* - If there have been no idle pdflush instances for 1 second, create
* a new one.
@@ -98,7 +106,6 @@ static int __pdflush(struct pdflush_work *my_work)
INIT_LIST_HEAD(&my_work->list);
spin_lock_irq(&pdflush_lock);
- nr_pdflush_threads++;
for ( ; ; ) {
struct pdflush_work *pdf;
@@ -126,20 +133,25 @@ static int __pdflush(struct pdflush_work *my_work)
(*my_work->fn)(my_work->arg0);
+ spin_lock_irq(&pdflush_lock);
+
/*
* Thread creation: For how long have there been zero
* available threads?
+ *
+ * To throttle creation, we reset last_empty_jifs.
*/
if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
- /* unlocked list_empty() test is OK here */
- if (list_empty(&pdflush_list)) {
- /* unlocked test is OK here */
- if (nr_pdflush_threads < MAX_PDFLUSH_THREADS)
- start_one_pdflush_thread();
+ if (list_empty(&pdflush_list) &&
+ nr_pdflush_threads < nr_pdflush_threads_max) {
+ last_empty_jifs = jiffies;
+ nr_pdflush_threads++;
+ spin_unlock_irq(&pdflush_lock);
+ start_one_pdflush_thread();
+ spin_lock_irq(&pdflush_lock);
}
}
- spin_lock_irq(&pdflush_lock);
my_work->fn = NULL;
/*
@@ -148,7 +160,7 @@ static int __pdflush(struct pdflush_work *my_work)
*/
if (list_empty(&pdflush_list))
continue;
- if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
+ if (nr_pdflush_threads <= nr_pdflush_threads_min)
continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
@@ -236,14 +248,27 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
static void start_one_pdflush_thread(void)
{
- kthread_run(pdflush, NULL, "pdflush");
+ struct task_struct *k;
+
+ k = kthread_run(pdflush, NULL, "pdflush");
+ if (unlikely(IS_ERR(k))) {
+ spin_lock_irq(&pdflush_lock);
+ nr_pdflush_threads--;
+ spin_unlock_irq(&pdflush_lock);
+ }
}
static int __init pdflush_init(void)
{
int i;
- for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
+ /*
+ * Pre-set nr_pdflush_threads... If we fail to create,
+ * the count will be decremented.
+ */
+ nr_pdflush_threads = nr_pdflush_threads_min;
+
+ for (i = 0; i < nr_pdflush_threads_min; i++)
start_one_pdflush_thread();
return 0;
}
diff --git a/mm/quicklist.c b/mm/quicklist.c
index 8dbb6805ef3..e66d07d1b4f 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -29,7 +29,7 @@ static unsigned long max_pages(unsigned long min_pages)
int node = numa_node_id();
struct zone *zones = NODE_DATA(node)->node_zones;
int num_cpus_on_node;
- node_to_cpumask_ptr(cpumask_on_node, node);
+ const struct cpumask *cpumask_on_node = cpumask_of_node(node);
node_free_pages =
#ifdef CONFIG_ZONE_DMA
diff --git a/mm/readahead.c b/mm/readahead.c
index 9ce303d4b81..133b6d52551 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -31,6 +31,42 @@ EXPORT_SYMBOL_GPL(file_ra_state_init);
#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+/*
+ * see if a page needs releasing upon read_cache_pages() failure
+ * - the caller of read_cache_pages() may have set PG_private or PG_fscache
+ * before calling, such as the NFS fs marking pages that are cached locally
+ * on disk, thus we need to give the fs a chance to clean up in the event of
+ * an error
+ */
+static void read_cache_pages_invalidate_page(struct address_space *mapping,
+ struct page *page)
+{
+ if (page_has_private(page)) {
+ if (!trylock_page(page))
+ BUG();
+ page->mapping = mapping;
+ do_invalidatepage(page, 0);
+ page->mapping = NULL;
+ unlock_page(page);
+ }
+ page_cache_release(page);
+}
+
+/*
+ * release a list of pages, invalidating them first if need be
+ */
+static void read_cache_pages_invalidate_pages(struct address_space *mapping,
+ struct list_head *pages)
+{
+ struct page *victim;
+
+ while (!list_empty(pages)) {
+ victim = list_to_page(pages);
+ list_del(&victim->lru);
+ read_cache_pages_invalidate_page(mapping, victim);
+ }
+}
+
/**
* read_cache_pages - populate an address space with some pages & start reads against them
* @mapping: the address_space
@@ -52,14 +88,14 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
page->index, GFP_KERNEL)) {
- page_cache_release(page);
+ read_cache_pages_invalidate_page(mapping, page);
continue;
}
page_cache_release(page);
ret = filler(data, page);
if (unlikely(ret)) {
- put_pages_list(pages);
+ read_cache_pages_invalidate_pages(mapping, pages);
break;
}
task_io_account_read(PAGE_CACHE_SIZE);
diff --git a/mm/shmem.c b/mm/shmem.c
index d94d2e9146b..f9cb20ebb99 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -24,6 +24,7 @@
#include <linux/init.h>
#include <linux/vfs.h>
#include <linux/mount.h>
+#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -43,7 +44,6 @@ static struct vfsmount *shm_mnt;
#include <linux/exportfs.h>
#include <linux/generic_acl.h>
#include <linux/mman.h>
-#include <linux/pagemap.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@@ -65,13 +65,28 @@ static struct vfsmount *shm_mnt;
#include <asm/div64.h>
#include <asm/pgtable.h>
+/*
+ * The maximum size of a shmem/tmpfs file is limited by the maximum size of
+ * its triple-indirect swap vector - see illustration at shmem_swp_entry().
+ *
+ * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
+ * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum
+ * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
+ * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
+ *
+ * We use / and * instead of shifts in the definitions below, so that the swap
+ * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
+ */
#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
-#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
+#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
-#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
+#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
+#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
+#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
+#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
+
+#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
@@ -2581,7 +2596,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
#define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev)
#define shmem_acct_size(flags, size) 0
#define shmem_unacct_size(flags, size) do {} while (0)
-#define SHMEM_MAX_BYTES LLONG_MAX
+#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
#endif /* CONFIG_SHMEM */
diff --git a/mm/slab.c b/mm/slab.c
index 208323fd37b..9a90b00d2f9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
#include <linux/cpu.h>
#include <linux/sysctl.h>
#include <linux/module.h>
+#include <trace/kmemtrace.h>
#include <linux/rcupdate.h>
#include <linux/string.h>
#include <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#endif
+#ifdef CONFIG_KMEMTRACE
+size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+ return cachep->buffer_size;
+}
+EXPORT_SYMBOL(slab_buffer_size);
+#endif
+
/*
* Do not go above this order unless 0 objects fit into the slab.
*/
@@ -1160,7 +1169,7 @@ static void __cpuinit cpuup_canceled(long cpu)
struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL;
int node = cpu_to_node(cpu);
- node_to_cpumask_ptr(mask, node);
+ const struct cpumask *mask = cpumask_of_node(node);
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
@@ -3554,10 +3563,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+ trace_kmem_cache_alloc(_RET_IP_, ret,
+ obj_size(cachep), cachep->buffer_size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+{
+ return __cache_alloc(cachep, flags, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
/**
* kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
* @cachep: the cache we're checking against
@@ -3602,23 +3624,46 @@ out:
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- return __cache_alloc_node(cachep, flags, nodeid,
- __builtin_return_address(0));
+ void *ret = __cache_alloc_node(cachep, flags, nodeid,
+ __builtin_return_address(0));
+
+ trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ obj_size(cachep), cachep->buffer_size,
+ flags, nodeid);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
+{
+ return __cache_alloc_node(cachep, flags, nodeid,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
{
struct kmem_cache *cachep;
+ void *ret;
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return kmem_cache_alloc_node(cachep, flags, node);
+ ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+ trace_kmalloc_node((unsigned long) caller, ret,
+ size, cachep->buffer_size, flags, node);
+
+ return ret;
}
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node,
@@ -3651,6 +3696,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
void *caller)
{
struct kmem_cache *cachep;
+ void *ret;
/* If you want to save a few bytes .text space: replace
* __ with kmem_.
@@ -3660,11 +3706,16 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
cachep = __find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return __cache_alloc(cachep, flags, caller);
+ ret = __cache_alloc(cachep, flags, caller);
+
+ trace_kmalloc((unsigned long) caller, ret,
+ size, cachep->buffer_size, flags);
+
+ return ret;
}
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3703,6 +3754,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
debug_check_no_obj_freed(objp, obj_size(cachep));
__cache_free(cachep, objp);
local_irq_restore(flags);
+
+ trace_kmem_cache_free(_RET_IP_, objp);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3720,6 +3773,8 @@ void kfree(const void *objp)
struct kmem_cache *c;
unsigned long flags;
+ trace_kfree(_RET_IP_, objp);
+
if (unlikely(ZERO_OR_NULL_PTR(objp)))
return;
local_irq_save(flags);
diff --git a/mm/slob.c b/mm/slob.c
index 7a3411524da..a2d4ab32198 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
+#include <trace/kmemtrace.h>
#include <asm/atomic.h>
/*
@@ -474,6 +475,7 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
unsigned int *m;
int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+ void *ret;
lockdep_trace_alloc(gfp);
@@ -482,12 +484,16 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
return ZERO_SIZE_PTR;
m = slob_alloc(size + align, gfp, align, node);
+
if (!m)
return NULL;
*m = size;
- return (void *)m + align;
+ ret = (void *)m + align;
+
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, size + align, gfp, node);
} else {
- void *ret;
+ unsigned int order = get_order(size);
ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node);
if (ret) {
@@ -495,8 +501,12 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
page = virt_to_page(ret);
page->private = size;
}
- return ret;
+
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, PAGE_SIZE << order, gfp, node);
}
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
@@ -504,6 +514,8 @@ void kfree(const void *block)
{
struct slob_page *sp;
+ trace_kfree(_RET_IP_, block);
+
if (unlikely(ZERO_OR_NULL_PTR(block)))
return;
@@ -583,10 +595,17 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
void *b;
- if (c->size < PAGE_SIZE)
+ if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);
- else
+ trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
+ SLOB_UNITS(c->size) * SLOB_UNIT,
+ flags, node);
+ } else {
b = slob_new_pages(flags, get_order(c->size), node);
+ trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
+ PAGE_SIZE << get_order(c->size),
+ flags, node);
+ }
if (c->ctor)
c->ctor(b);
@@ -622,6 +641,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
} else {
__kmem_cache_free(b, c->size);
}
+
+ trace_kmem_cache_free(_RET_IP_, b);
}
EXPORT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index c4ea9158c9f..7ab54ecbd3f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
@@ -1618,18 +1619,45 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
- return slab_alloc(s, gfpflags, -1, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+
+ trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return slab_alloc(s, gfpflags, -1, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- return slab_alloc(s, gfpflags, node, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+ trace_kmem_cache_alloc_node(_RET_IP_, ret,
+ s->objsize, s->size, gfpflags, node);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
#endif
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return slab_alloc(s, gfpflags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
/*
* Slow patch handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
@@ -1737,6 +1765,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
page = virt_to_head_page(x);
slab_free(s, page, x, _RET_IP_);
+
+ trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -2659,6 +2689,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large(size, flags);
@@ -2668,7 +2699,11 @@ void *__kmalloc(size_t size, gfp_t flags)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, -1, _RET_IP_);
+ ret = slab_alloc(s, flags, -1, _RET_IP_);
+
+ trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc);
@@ -2687,16 +2722,28 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
+ void *ret;
- if (unlikely(size > SLUB_MAX_SIZE))
- return kmalloc_large_node(size, flags, node);
+ if (unlikely(size > SLUB_MAX_SIZE)) {
+ ret = kmalloc_large_node(size, flags, node);
+
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, PAGE_SIZE << get_order(size),
+ flags, node);
+
+ return ret;
+ }
s = get_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, node, _RET_IP_);
+ ret = slab_alloc(s, flags, node, _RET_IP_);
+
+ trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
#endif
@@ -2745,6 +2792,8 @@ void kfree(const void *x)
struct page *page;
void *object = (void *)x;
+ trace_kfree(_RET_IP_, x);
+
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
@@ -3224,6 +3273,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large(size, gfpflags);
@@ -3233,13 +3283,19 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, -1, caller);
+ ret = slab_alloc(s, gfpflags, -1, caller);
+
+ /* Honor the call site pointer we recieved. */
+ trace_kmalloc(caller, ret, size, s->size, gfpflags);
+
+ return ret;
}
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
int node, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large_node(size, gfpflags, node);
@@ -3249,7 +3305,12 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, node, caller);
+ ret = slab_alloc(s, gfpflags, node, caller);
+
+ /* Honor the call site pointer we recieved. */
+ trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+
+ return ret;
}
#ifdef CONFIG_SLUB_DEBUG
diff --git a/mm/swap.c b/mm/swap.c
index 6e83084c1f6..bede23ce64e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -448,8 +448,8 @@ void pagevec_strip(struct pagevec *pvec)
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- if (PagePrivate(page) && trylock_page(page)) {
- if (PagePrivate(page))
+ if (page_has_private(page) && trylock_page(page)) {
+ if (page_has_private(page))
try_to_release_page(page, 0);
unlock_page(page);
}
diff --git a/mm/truncate.c b/mm/truncate.c
index 1229211104f..55206fab7b9 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -50,7 +50,7 @@ void do_invalidatepage(struct page *page, unsigned long offset)
static inline void truncate_partial_page(struct page *page, unsigned partial)
{
zero_user_segment(page, partial, PAGE_CACHE_SIZE);
- if (PagePrivate(page))
+ if (page_has_private(page))
do_invalidatepage(page, partial);
}
@@ -99,7 +99,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return;
- if (PagePrivate(page))
+ if (page_has_private(page))
do_invalidatepage(page, 0);
cancel_dirty_page(page, PAGE_CACHE_SIZE);
@@ -126,7 +126,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return 0;
- if (PagePrivate(page) && !try_to_release_page(page, 0))
+ if (page_has_private(page) && !try_to_release_page(page, 0))
return 0;
clear_page_mlock(page);
@@ -348,7 +348,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return 0;
- if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
+ if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
return 0;
spin_lock_irq(&mapping->tree_lock);
@@ -356,7 +356,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
goto failed;
clear_page_mlock(page);
- BUG_ON(PagePrivate(page));
+ BUG_ON(page_has_private(page));
__remove_from_page_cache(page);
spin_unlock_irq(&mapping->tree_lock);
page_cache_release(page); /* pagecache ref */
diff --git a/mm/util.c b/mm/util.c
index 7c122e49f76..55bef160b9f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,6 +4,7 @@
#include <linux/module.h>
#include <linux/err.h>
#include <linux/sched.h>
+#include <linux/tracepoint.h>
#include <asm/uaccess.h>
/**
@@ -222,6 +223,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
}
#endif
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @write: whether pages will be written to
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
int __attribute__((weak)) get_user_pages_fast(unsigned long start,
int nr_pages, int write, struct page **pages)
{
@@ -236,3 +253,18 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
return ret;
}
EXPORT_SYMBOL_GPL(get_user_pages_fast);
+
+/* Tracepoints definitions. */
+DEFINE_TRACE(kmalloc);
+DEFINE_TRACE(kmem_cache_alloc);
+DEFINE_TRACE(kmalloc_node);
+DEFINE_TRACE(kmem_cache_alloc_node);
+DEFINE_TRACE(kfree);
+DEFINE_TRACE(kmem_cache_free);
+
+EXPORT_TRACEPOINT_SYMBOL(kmalloc);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
+EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kfree);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06e72693b45..39fdfb14eea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -283,7 +283,7 @@ static inline int page_mapping_inuse(struct page *page)
static inline int is_page_cache_freeable(struct page *page)
{
- return page_count(page) - !!PagePrivate(page) == 2;
+ return page_count(page) - !!page_has_private(page) == 2;
}
static int may_write_to_queue(struct backing_dev_info *bdi)
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
* Some data journaling orphaned pages can have
* page->mapping == NULL while being dirty with clean buffers.
*/
- if (PagePrivate(page)) {
+ if (page_has_private(page)) {
if (try_to_free_buffers(page)) {
ClearPageDirty(page);
printk("%s: orphaned page\n", __func__);
@@ -727,7 +727,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* process address space (page_count == 1) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/
- if (PagePrivate(page)) {
+ if (page_has_private(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
if (!mapping && page_count(page) == 1) {
@@ -1967,7 +1967,7 @@ static int kswapd(void *p)
struct reclaim_state reclaim_state = {
.reclaimed_slab = 0,
};
- node_to_cpumask_ptr(cpumask, pgdat->node_id);
+ const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
lockdep_set_current_reclaim_state(GFP_KERNEL);
@@ -2204,7 +2204,9 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
for_each_node_state(nid, N_HIGH_MEMORY) {
pg_data_t *pgdat = NODE_DATA(nid);
- node_to_cpumask_ptr(mask, pgdat->node_id);
+ const struct cpumask *mask;
+
+ mask = cpumask_of_node(pgdat->node_id);
if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
/* One of our CPUs online: restore mask */