diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 6 | ||||
-rw-r--r-- | mm/memcontrol.c | 172 | ||||
-rw-r--r-- | mm/mempolicy.c | 9 | ||||
-rw-r--r-- | mm/slub.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 8 | ||||
-rw-r--r-- | mm/vmscan.c | 66 | ||||
-rw-r--r-- | mm/vmstat.c | 4 |
7 files changed, 43 insertions, 224 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 645a080ba4d..7771871fa35 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -827,13 +827,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, { unsigned int i; unsigned int ret; - unsigned int nr_found; + unsigned int nr_found, nr_skip; rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, (void ***)pages, NULL, start, nr_pages); ret = 0; + nr_skip = 0; for (i = 0; i < nr_found; i++) { struct page *page; repeat: @@ -856,6 +857,7 @@ repeat: * here as an exceptional entry: so skip over it - * we only reach this from invalidate_mapping_pages(). */ + nr_skip++; continue; } @@ -876,7 +878,7 @@ repeat: * If all entries were removed before we could secure them, * try again, because callers stop trying once 0 is returned. */ - if (unlikely(!ret && nr_found)) + if (unlikely(!ret && nr_found > nr_skip)) goto restart; rcu_read_unlock(); return ret; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ebd1e86bef1..3508777837c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list { static void mem_cgroup_threshold(struct mem_cgroup *mem); static void mem_cgroup_oom_notify(struct mem_cgroup *mem); -enum { - SCAN_BY_LIMIT, - SCAN_BY_SYSTEM, - NR_SCAN_CONTEXT, - SCAN_BY_SHRINK, /* not recorded now */ -}; - -enum { - SCAN, - SCAN_ANON, - SCAN_FILE, - ROTATE, - ROTATE_ANON, - ROTATE_FILE, - FREED, - FREED_ANON, - FREED_FILE, - ELAPSED, - NR_SCANSTATS, -}; - -struct scanstat { - spinlock_t lock; - unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; - unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; -}; - -const char *scanstat_string[NR_SCANSTATS] = { - "scanned_pages", - "scanned_anon_pages", - "scanned_file_pages", - "rotated_pages", - "rotated_anon_pages", - "rotated_file_pages", - "freed_pages", - "freed_anon_pages", - "freed_file_pages", - "elapsed_ns", -}; -#define SCANSTAT_WORD_LIMIT "_by_limit" -#define SCANSTAT_WORD_SYSTEM "_by_system" -#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" - - /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@ -313,8 +269,7 @@ struct mem_cgroup { /* For oom notifier event fd */ struct list_head oom_notify; - /* For recording LRU-scan statistics */ - struct scanstat scanstat; + /* * Should we move charges of a task when a task is moved into this * mem_cgroup ? And what type of charges should we move ? @@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) } #endif -static void __mem_cgroup_record_scanstat(unsigned long *stats, - struct memcg_scanrecord *rec) -{ - - stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; - stats[SCAN_ANON] += rec->nr_scanned[0]; - stats[SCAN_FILE] += rec->nr_scanned[1]; - - stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; - stats[ROTATE_ANON] += rec->nr_rotated[0]; - stats[ROTATE_FILE] += rec->nr_rotated[1]; - - stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; - stats[FREED_ANON] += rec->nr_freed[0]; - stats[FREED_FILE] += rec->nr_freed[1]; - - stats[ELAPSED] += rec->elapsed; -} - -static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) -{ - struct mem_cgroup *mem; - int context = rec->context; - - if (context >= NR_SCAN_CONTEXT) - return; - - mem = rec->mem; - spin_lock(&mem->scanstat.lock); - __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); - spin_unlock(&mem->scanstat.lock); - - mem = rec->root; - spin_lock(&mem->scanstat.lock); - __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); - spin_unlock(&mem->scanstat.lock); -} - /* * Scan the hierarchy if needed to reclaim memory. We remember the last child * we reclaimed from, so that we don't end up penalizing one child extensively @@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; - struct memcg_scanrecord rec; unsigned long excess; - unsigned long scanned; + unsigned long nr_scanned; excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; @@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, if (!check_soft && !shrink && root_mem->memsw_is_minimum) noswap = true; - if (shrink) - rec.context = SCAN_BY_SHRINK; - else if (check_soft) - rec.context = SCAN_BY_SYSTEM; - else - rec.context = SCAN_BY_LIMIT; - - rec.root = root_mem; - while (1) { victim = mem_cgroup_select_victim(root_mem); if (victim == root_mem) { @@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, css_put(&victim->css); continue; } - rec.mem = victim; - rec.nr_scanned[0] = 0; - rec.nr_scanned[1] = 0; - rec.nr_rotated[0] = 0; - rec.nr_rotated[1] = 0; - rec.nr_freed[0] = 0; - rec.nr_freed[1] = 0; - rec.elapsed = 0; /* we use swappiness of local cgroup */ if (check_soft) { ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, - noswap, zone, &rec, &scanned); - *total_scanned += scanned; + noswap, zone, &nr_scanned); + *total_scanned += nr_scanned; } else ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, - noswap, &rec); - mem_cgroup_record_scanstat(&rec); + noswap); css_put(&victim->css); /* * At shrinking usage, we can't check we should stop here or @@ -3854,18 +3752,14 @@ try_to_free: /* try to free all pages in this cgroup */ shrink = 1; while (nr_retries && mem->res.usage > 0) { - struct memcg_scanrecord rec; int progress; if (signal_pending(current)) { ret = -EINTR; goto out; } - rec.context = SCAN_BY_SHRINK; - rec.mem = mem; - rec.root = mem; progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, - false, &rec); + false); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) } #endif /* CONFIG_NUMA */ -static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, - struct cftype *cft, - struct cgroup_map_cb *cb) -{ - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); - char string[64]; - int i; - - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_LIMIT); - cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); - } - - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_SYSTEM); - cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); - } - - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_LIMIT); - strcat(string, SCANSTAT_WORD_HIERARCHY); - cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); - } - for (i = 0; i < NR_SCANSTATS; i++) { - strcpy(string, scanstat_string[i]); - strcat(string, SCANSTAT_WORD_SYSTEM); - strcat(string, SCANSTAT_WORD_HIERARCHY); - cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); - } - return 0; -} - -static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, - unsigned int event) -{ - struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); - - spin_lock(&mem->scanstat.lock); - memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); - memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); - spin_unlock(&mem->scanstat.lock); - return 0; -} - - static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = { .mode = S_IRUGO, }, #endif - { - .name = "vmscan_stat", - .read_map = mem_cgroup_vmscan_stat_read, - .trigger = mem_cgroup_reset_vmscan_stat, - }, }; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP @@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) atomic_set(&mem->refcnt, 1); mem->move_charge_at_immigrate = 0; mutex_init(&mem->thresholds_lock); - spin_lock_init(&mem->scanstat.lock); return &mem->css; free_out: __mem_cgroup_free(mem); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8b57173c1dd..9c51f9f58ca 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -636,7 +636,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; - pgoff_t pgoff; unsigned long vmstart; unsigned long vmend; @@ -649,9 +648,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); - pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, - vma->anon_vma, vma->vm_file, pgoff, new_pol); + vma->anon_vma, vma->vm_file, vma->vm_pgoff, + new_pol); if (prev) { vma = prev; next = vma->vm_next; @@ -1412,7 +1411,9 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy, err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags); if (!err && nmask) { - err = copy_from_user(bm, nm, alloc_size); + unsigned long copy_size; + copy_size = min_t(unsigned long, sizeof(bm), alloc_size); + err = copy_from_user(bm, nm, copy_size); /* ensure entire bitmap is zeroed */ err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8); err |= compat_put_bitmap(nmask, bm, nr_bits); diff --git a/mm/slub.c b/mm/slub.c index 9f662d70eb4..7c54fe83a90 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2377,7 +2377,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, */ if (unlikely(!prior)) { remove_full(s, page); - add_partial(n, page, 0); + add_partial(n, page, 1); stat(s, FREE_ADD_PARTIAL); } } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7ef0903058e..5016f19e166 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2140,6 +2140,14 @@ struct vm_struct *alloc_vm_area(size_t size) return NULL; } + /* + * If the allocated address space is passed to a hypercall + * before being used then we cannot rely on a page fault to + * trigger an update of the page tables. So sync all the page + * tables here. + */ + vmalloc_sync_all(); + return area; } EXPORT_SYMBOL_GPL(alloc_vm_area); diff --git a/mm/vmscan.c b/mm/vmscan.c index b7719ec10dc..b55699cd906 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -105,7 +105,6 @@ struct scan_control { /* Which cgroup do we reclaim from */ struct mem_cgroup *mem_cgroup; - struct memcg_scanrecord *memcg_record; /* * Nodemask of nodes allowed by the caller. If NULL, all nodes @@ -1349,8 +1348,6 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); reclaim_stat->recent_rotated[file] += numpages; - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_rotated[file] += numpages; } if (!pagevec_add(&pvec, page)) { spin_unlock_irq(&zone->lru_lock); @@ -1394,10 +1391,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, reclaim_stat->recent_scanned[0] += *nr_anon; reclaim_stat->recent_scanned[1] += *nr_file; - if (!scanning_global_lru(sc)) { - sc->memcg_record->nr_scanned[0] += *nr_anon; - sc->memcg_record->nr_scanned[1] += *nr_file; - } } /* @@ -1511,9 +1504,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, nr_reclaimed += shrink_page_list(&page_list, zone, sc); } - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_freed[file] += nr_reclaimed; - local_irq_disable(); if (current_is_kswapd()) __count_vm_events(KSWAPD_STEAL, nr_reclaimed); @@ -1613,8 +1603,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, } reclaim_stat->recent_scanned[file] += nr_taken; - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, pgscanned); if (file) @@ -1666,8 +1654,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * get_scan_ratio. */ reclaim_stat->recent_rotated[file] += nr_rotated; - if (!scanning_global_lru(sc)) - sc->memcg_record->nr_rotated[file] += nr_rotated; move_active_pages_to_lru(zone, &l_active, LRU_ACTIVE + file * LRU_FILE); @@ -1808,23 +1794,15 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, u64 fraction[2], denominator; enum lru_list l; int noswap = 0; - int force_scan = 0; + bool force_scan = false; unsigned long nr_force_scan[2]; - - anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); - - if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) { - /* kswapd does zone balancing and need to scan this zone */ - if (scanning_global_lru(sc) && current_is_kswapd()) - force_scan = 1; - /* memcg may have small limit and need to avoid priority drop */ - if (!scanning_global_lru(sc)) - force_scan = 1; - } + /* kswapd does zone balancing and needs to scan this zone */ + if (scanning_global_lru(sc) && current_is_kswapd()) + force_scan = true; + /* memcg may have small limit and need to avoid priority drop */ + if (!scanning_global_lru(sc)) + force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ if (!sc->may_swap || (nr_swap_pages <= 0)) { @@ -1837,6 +1815,11 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, goto out; } + anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); + file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); + if (scanning_global_lru(sc)) { free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, @@ -2268,10 +2251,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, - gfp_t gfp_mask, bool noswap, - struct zone *zone, - struct memcg_scanrecord *rec, - unsigned long *scanned) + gfp_t gfp_mask, bool noswap, + struct zone *zone, + unsigned long *nr_scanned) { struct scan_control sc = { .nr_scanned = 0, @@ -2281,9 +2263,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, .may_swap = !noswap, .order = 0, .mem_cgroup = mem, - .memcg_record = rec, }; - ktime_t start, end; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -2292,7 +2272,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, sc.may_writepage, sc.gfp_mask); - start = ktime_get(); /* * NOTE: Although we can get the priority field, using it * here is not a good idea, since it limits the pages we can scan. @@ -2301,25 +2280,19 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, * the priority and make it zero. */ shrink_zone(0, zone, &sc); - end = ktime_get(); - - if (rec) - rec->elapsed += ktime_to_ns(ktime_sub(end, start)); - *scanned = sc.nr_scanned; trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); + *nr_scanned = sc.nr_scanned; return sc.nr_reclaimed; } unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask, - bool noswap, - struct memcg_scanrecord *rec) + bool noswap) { struct zonelist *zonelist; unsigned long nr_reclaimed; - ktime_t start, end; int nid; struct scan_control sc = { .may_writepage = !laptop_mode, @@ -2328,7 +2301,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, .mem_cgroup = mem_cont, - .memcg_record = rec, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), @@ -2337,7 +2309,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .gfp_mask = sc.gfp_mask, }; - start = ktime_get(); /* * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't * take care of from where we get pages. So the node where we start the @@ -2352,9 +2323,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, sc.gfp_mask); nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); - end = ktime_get(); - if (rec) - rec->elapsed += ktime_to_ns(ktime_sub(end, start)); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); diff --git a/mm/vmstat.c b/mm/vmstat.c index 20c18b7694b..d52b13d28e8 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -659,7 +659,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, } #endif -#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) +#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) #ifdef CONFIG_ZONE_DMA #define TEXT_FOR_DMA(xx) xx "_dma", #else @@ -788,7 +788,7 @@ const char * const vmstat_text[] = { #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; -#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */ +#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ #ifdef CONFIG_PROC_FS |