summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/memcontrol.c172
-rw-r--r--mm/mempolicy.c9
-rw-r--r--mm/slub.c2
-rw-r--r--mm/vmalloc.c8
-rw-r--r--mm/vmscan.c66
-rw-r--r--mm/vmstat.c4
7 files changed, 43 insertions, 224 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 645a080ba4d..7771871fa35 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -827,13 +827,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
{
unsigned int i;
unsigned int ret;
- unsigned int nr_found;
+ unsigned int nr_found, nr_skip;
rcu_read_lock();
restart:
nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
(void ***)pages, NULL, start, nr_pages);
ret = 0;
+ nr_skip = 0;
for (i = 0; i < nr_found; i++) {
struct page *page;
repeat:
@@ -856,6 +857,7 @@ repeat:
* here as an exceptional entry: so skip over it -
* we only reach this from invalidate_mapping_pages().
*/
+ nr_skip++;
continue;
}
@@ -876,7 +878,7 @@ repeat:
* If all entries were removed before we could secure them,
* try again, because callers stop trying once 0 is returned.
*/
- if (unlikely(!ret && nr_found))
+ if (unlikely(!ret && nr_found > nr_skip))
goto restart;
rcu_read_unlock();
return ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ebd1e86bef1..3508777837c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
static void mem_cgroup_threshold(struct mem_cgroup *mem);
static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
-enum {
- SCAN_BY_LIMIT,
- SCAN_BY_SYSTEM,
- NR_SCAN_CONTEXT,
- SCAN_BY_SHRINK, /* not recorded now */
-};
-
-enum {
- SCAN,
- SCAN_ANON,
- SCAN_FILE,
- ROTATE,
- ROTATE_ANON,
- ROTATE_FILE,
- FREED,
- FREED_ANON,
- FREED_FILE,
- ELAPSED,
- NR_SCANSTATS,
-};
-
-struct scanstat {
- spinlock_t lock;
- unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
- unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
-};
-
-const char *scanstat_string[NR_SCANSTATS] = {
- "scanned_pages",
- "scanned_anon_pages",
- "scanned_file_pages",
- "rotated_pages",
- "rotated_anon_pages",
- "rotated_file_pages",
- "freed_pages",
- "freed_anon_pages",
- "freed_file_pages",
- "elapsed_ns",
-};
-#define SCANSTAT_WORD_LIMIT "_by_limit"
-#define SCANSTAT_WORD_SYSTEM "_by_system"
-#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
-
-
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
/* For oom notifier event fd */
struct list_head oom_notify;
- /* For recording LRU-scan statistics */
- struct scanstat scanstat;
+
/*
* Should we move charges of a task when a task is moved into this
* mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
}
#endif
-static void __mem_cgroup_record_scanstat(unsigned long *stats,
- struct memcg_scanrecord *rec)
-{
-
- stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
- stats[SCAN_ANON] += rec->nr_scanned[0];
- stats[SCAN_FILE] += rec->nr_scanned[1];
-
- stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
- stats[ROTATE_ANON] += rec->nr_rotated[0];
- stats[ROTATE_FILE] += rec->nr_rotated[1];
-
- stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
- stats[FREED_ANON] += rec->nr_freed[0];
- stats[FREED_FILE] += rec->nr_freed[1];
-
- stats[ELAPSED] += rec->elapsed;
-}
-
-static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
-{
- struct mem_cgroup *mem;
- int context = rec->context;
-
- if (context >= NR_SCAN_CONTEXT)
- return;
-
- mem = rec->mem;
- spin_lock(&mem->scanstat.lock);
- __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
- spin_unlock(&mem->scanstat.lock);
-
- mem = rec->root;
- spin_lock(&mem->scanstat.lock);
- __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
- spin_unlock(&mem->scanstat.lock);
-}
-
/*
* Scan the hierarchy if needed to reclaim memory. We remember the last child
* we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
- struct memcg_scanrecord rec;
unsigned long excess;
- unsigned long scanned;
+ unsigned long nr_scanned;
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
if (!check_soft && !shrink && root_mem->memsw_is_minimum)
noswap = true;
- if (shrink)
- rec.context = SCAN_BY_SHRINK;
- else if (check_soft)
- rec.context = SCAN_BY_SYSTEM;
- else
- rec.context = SCAN_BY_LIMIT;
-
- rec.root = root_mem;
-
while (1) {
victim = mem_cgroup_select_victim(root_mem);
if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
css_put(&victim->css);
continue;
}
- rec.mem = victim;
- rec.nr_scanned[0] = 0;
- rec.nr_scanned[1] = 0;
- rec.nr_rotated[0] = 0;
- rec.nr_rotated[1] = 0;
- rec.nr_freed[0] = 0;
- rec.nr_freed[1] = 0;
- rec.elapsed = 0;
/* we use swappiness of local cgroup */
if (check_soft) {
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
- noswap, zone, &rec, &scanned);
- *total_scanned += scanned;
+ noswap, zone, &nr_scanned);
+ *total_scanned += nr_scanned;
} else
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
- noswap, &rec);
- mem_cgroup_record_scanstat(&rec);
+ noswap);
css_put(&victim->css);
/*
* At shrinking usage, we can't check we should stop here or
@@ -3854,18 +3752,14 @@ try_to_free:
/* try to free all pages in this cgroup */
shrink = 1;
while (nr_retries && mem->res.usage > 0) {
- struct memcg_scanrecord rec;
int progress;
if (signal_pending(current)) {
ret = -EINTR;
goto out;
}
- rec.context = SCAN_BY_SHRINK;
- rec.mem = mem;
- rec.root = mem;
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
- false, &rec);
+ false);
if (!progress) {
nr_retries--;
/* maybe some writeback is necessary */
@@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
}
#endif /* CONFIG_NUMA */
-static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct cgroup_map_cb *cb)
-{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
- char string[64];
- int i;
-
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_LIMIT);
- cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
- }
-
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_SYSTEM);
- cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
- }
-
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_LIMIT);
- strcat(string, SCANSTAT_WORD_HIERARCHY);
- cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
- }
- for (i = 0; i < NR_SCANSTATS; i++) {
- strcpy(string, scanstat_string[i]);
- strcat(string, SCANSTAT_WORD_SYSTEM);
- strcat(string, SCANSTAT_WORD_HIERARCHY);
- cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
- }
- return 0;
-}
-
-static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
- unsigned int event)
-{
- struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
-
- spin_lock(&mem->scanstat.lock);
- memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
- memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
- spin_unlock(&mem->scanstat.lock);
- return 0;
-}
-
-
static struct cftype mem_cgroup_files[] = {
{
.name = "usage_in_bytes",
@@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
.mode = S_IRUGO,
},
#endif
- {
- .name = "vmscan_stat",
- .read_map = mem_cgroup_vmscan_stat_read,
- .trigger = mem_cgroup_reset_vmscan_stat,
- },
};
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
atomic_set(&mem->refcnt, 1);
mem->move_charge_at_immigrate = 0;
mutex_init(&mem->thresholds_lock);
- spin_lock_init(&mem->scanstat.lock);
return &mem->css;
free_out:
__mem_cgroup_free(mem);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8b57173c1dd..9c51f9f58ca 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -636,7 +636,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
struct vm_area_struct *prev;
struct vm_area_struct *vma;
int err = 0;
- pgoff_t pgoff;
unsigned long vmstart;
unsigned long vmend;
@@ -649,9 +648,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);
- pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff, new_pol);
+ vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ new_pol);
if (prev) {
vma = prev;
next = vma->vm_next;
@@ -1412,7 +1411,9 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy,
err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
if (!err && nmask) {
- err = copy_from_user(bm, nm, alloc_size);
+ unsigned long copy_size;
+ copy_size = min_t(unsigned long, sizeof(bm), alloc_size);
+ err = copy_from_user(bm, nm, copy_size);
/* ensure entire bitmap is zeroed */
err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);
err |= compat_put_bitmap(nmask, bm, nr_bits);
diff --git a/mm/slub.c b/mm/slub.c
index 9f662d70eb4..7c54fe83a90 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2377,7 +2377,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
*/
if (unlikely(!prior)) {
remove_full(s, page);
- add_partial(n, page, 0);
+ add_partial(n, page, 1);
stat(s, FREE_ADD_PARTIAL);
}
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7ef0903058e..5016f19e166 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2140,6 +2140,14 @@ struct vm_struct *alloc_vm_area(size_t size)
return NULL;
}
+ /*
+ * If the allocated address space is passed to a hypercall
+ * before being used then we cannot rely on a page fault to
+ * trigger an update of the page tables. So sync all the page
+ * tables here.
+ */
+ vmalloc_sync_all();
+
return area;
}
EXPORT_SYMBOL_GPL(alloc_vm_area);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7719ec10dc..b55699cd906 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -105,7 +105,6 @@ struct scan_control {
/* Which cgroup do we reclaim from */
struct mem_cgroup *mem_cgroup;
- struct memcg_scanrecord *memcg_record;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -1349,8 +1348,6 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
int file = is_file_lru(lru);
int numpages = hpage_nr_pages(page);
reclaim_stat->recent_rotated[file] += numpages;
- if (!scanning_global_lru(sc))
- sc->memcg_record->nr_rotated[file] += numpages;
}
if (!pagevec_add(&pvec, page)) {
spin_unlock_irq(&zone->lru_lock);
@@ -1394,10 +1391,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
reclaim_stat->recent_scanned[0] += *nr_anon;
reclaim_stat->recent_scanned[1] += *nr_file;
- if (!scanning_global_lru(sc)) {
- sc->memcg_record->nr_scanned[0] += *nr_anon;
- sc->memcg_record->nr_scanned[1] += *nr_file;
- }
}
/*
@@ -1511,9 +1504,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
nr_reclaimed += shrink_page_list(&page_list, zone, sc);
}
- if (!scanning_global_lru(sc))
- sc->memcg_record->nr_freed[file] += nr_reclaimed;
-
local_irq_disable();
if (current_is_kswapd())
__count_vm_events(KSWAPD_STEAL, nr_reclaimed);
@@ -1613,8 +1603,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
}
reclaim_stat->recent_scanned[file] += nr_taken;
- if (!scanning_global_lru(sc))
- sc->memcg_record->nr_scanned[file] += nr_taken;
__count_zone_vm_events(PGREFILL, zone, pgscanned);
if (file)
@@ -1666,8 +1654,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
* get_scan_ratio.
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- if (!scanning_global_lru(sc))
- sc->memcg_record->nr_rotated[file] += nr_rotated;
move_active_pages_to_lru(zone, &l_active,
LRU_ACTIVE + file * LRU_FILE);
@@ -1808,23 +1794,15 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
u64 fraction[2], denominator;
enum lru_list l;
int noswap = 0;
- int force_scan = 0;
+ bool force_scan = false;
unsigned long nr_force_scan[2];
-
- anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
- file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
-
- if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
- /* kswapd does zone balancing and need to scan this zone */
- if (scanning_global_lru(sc) && current_is_kswapd())
- force_scan = 1;
- /* memcg may have small limit and need to avoid priority drop */
- if (!scanning_global_lru(sc))
- force_scan = 1;
- }
+ /* kswapd does zone balancing and needs to scan this zone */
+ if (scanning_global_lru(sc) && current_is_kswapd())
+ force_scan = true;
+ /* memcg may have small limit and need to avoid priority drop */
+ if (!scanning_global_lru(sc))
+ force_scan = true;
/* If we have no swap space, do not bother scanning anon pages. */
if (!sc->may_swap || (nr_swap_pages <= 0)) {
@@ -1837,6 +1815,11 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
goto out;
}
+ anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+ file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+ zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+
if (scanning_global_lru(sc)) {
free = zone_page_state(zone, NR_FREE_PAGES);
/* If we have very few page cache pages,
@@ -2268,10 +2251,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
- gfp_t gfp_mask, bool noswap,
- struct zone *zone,
- struct memcg_scanrecord *rec,
- unsigned long *scanned)
+ gfp_t gfp_mask, bool noswap,
+ struct zone *zone,
+ unsigned long *nr_scanned)
{
struct scan_control sc = {
.nr_scanned = 0,
@@ -2281,9 +2263,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
.may_swap = !noswap,
.order = 0,
.mem_cgroup = mem,
- .memcg_record = rec,
};
- ktime_t start, end;
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2292,7 +2272,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
sc.may_writepage,
sc.gfp_mask);
- start = ktime_get();
/*
* NOTE: Although we can get the priority field, using it
* here is not a good idea, since it limits the pages we can scan.
@@ -2301,25 +2280,19 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
* the priority and make it zero.
*/
shrink_zone(0, zone, &sc);
- end = ktime_get();
-
- if (rec)
- rec->elapsed += ktime_to_ns(ktime_sub(end, start));
- *scanned = sc.nr_scanned;
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+ *nr_scanned = sc.nr_scanned;
return sc.nr_reclaimed;
}
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask,
- bool noswap,
- struct memcg_scanrecord *rec)
+ bool noswap)
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
- ktime_t start, end;
int nid;
struct scan_control sc = {
.may_writepage = !laptop_mode,
@@ -2328,7 +2301,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
.order = 0,
.mem_cgroup = mem_cont,
- .memcg_record = rec,
.nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2337,7 +2309,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.gfp_mask = sc.gfp_mask,
};
- start = ktime_get();
/*
* Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
* take care of from where we get pages. So the node where we start the
@@ -2352,9 +2323,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
sc.gfp_mask);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
- end = ktime_get();
- if (rec)
- rec->elapsed += ktime_to_ns(ktime_sub(end, start));
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 20c18b7694b..d52b13d28e8 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -659,7 +659,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
}
#endif
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS)
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
#ifdef CONFIG_ZONE_DMA
#define TEXT_FOR_DMA(xx) xx "_dma",
#else
@@ -788,7 +788,7 @@ const char * const vmstat_text[] = {
#endif /* CONFIG_VM_EVENTS_COUNTERS */
};
-#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
#ifdef CONFIG_PROC_FS