diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 471 |
1 files changed, 204 insertions, 267 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e013b8e57d2..3508777837c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,7 +35,6 @@ #include <linux/limits.h> #include <linux/mutex.h> #include <linux/rbtree.h> -#include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -246,10 +245,13 @@ struct mem_cgroup { * Should the accounting and control be hierarchical, per subtree? */ bool use_hierarchy; - atomic_t oom_lock; + + bool oom_lock; + atomic_t under_oom; + atomic_t refcnt; - unsigned int swappiness; + int swappiness; /* OOM-Killer disable */ int oom_kill_disable; @@ -636,27 +638,44 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, preempt_enable(); } -static unsigned long -mem_cgroup_get_zonestat_node(struct mem_cgroup *mem, int nid, enum lru_list idx) +unsigned long +mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, + unsigned int lru_mask) { struct mem_cgroup_per_zone *mz; + enum lru_list l; + unsigned long ret = 0; + + mz = mem_cgroup_zoneinfo(mem, nid, zid); + + for_each_lru(l) { + if (BIT(l) & lru_mask) + ret += MEM_CGROUP_ZSTAT(mz, l); + } + return ret; +} + +static unsigned long +mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem, + int nid, unsigned int lru_mask) +{ u64 total = 0; int zid; - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - mz = mem_cgroup_zoneinfo(mem, nid, zid); - total += MEM_CGROUP_ZSTAT(mz, idx); - } + for (zid = 0; zid < MAX_NR_ZONES; zid++) + total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask); + return total; } -static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, - enum lru_list idx) + +static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem, + unsigned int lru_mask) { int nid; u64 total = 0; - for_each_online_node(nid) - total += mem_cgroup_get_zonestat_node(mem, nid, idx); + for_each_node_state(nid, N_HIGH_MEMORY) + total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask); return total; } @@ -1043,6 +1062,21 @@ void mem_cgroup_move_lists(struct page *page, mem_cgroup_add_lru_list(page, to); } +/* + * Checks whether given mem is same or in the root_mem's + * hierarchy subtree + */ +static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem, + struct mem_cgroup *mem) +{ + if (root_mem != mem) { + return (root_mem->use_hierarchy && + css_is_ancestor(&mem->css, &root_mem->css)); + } + + return true; +} + int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) { int ret; @@ -1062,10 +1096,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) * enabled in "curr" and "curr" is a child of "mem" in *cgroup* * hierarchy(even if use_hierarchy is disabled in "mem"). */ - if (mem->use_hierarchy) - ret = css_is_ancestor(&curr->css, &mem->css); - else - ret = (curr == mem); + ret = mem_cgroup_same_or_subtree(mem, curr); css_put(&curr->css); return ret; } @@ -1077,8 +1108,8 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_ unsigned long gb; unsigned long inactive_ratio; - inactive = mem_cgroup_get_local_zonestat(memcg, LRU_INACTIVE_ANON); - active = mem_cgroup_get_local_zonestat(memcg, LRU_ACTIVE_ANON); + inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); + active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -1117,109 +1148,12 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg) unsigned long active; unsigned long inactive; - inactive = mem_cgroup_get_local_zonestat(memcg, LRU_INACTIVE_FILE); - active = mem_cgroup_get_local_zonestat(memcg, LRU_ACTIVE_FILE); + inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); + active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); return (active > inactive); } -unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, - struct zone *zone, - enum lru_list lru) -{ - int nid = zone_to_nid(zone); - int zid = zone_idx(zone); - struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid); - - return MEM_CGROUP_ZSTAT(mz, lru); -} - -static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg, - int nid) -{ - unsigned long ret; - - ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) + - mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE); - - return ret; -} - -static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg, - int nid) -{ - unsigned long ret; - - ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) + - mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON); - return ret; -} - -#if MAX_NUMNODES > 1 -static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg) -{ - u64 total = 0; - int nid; - - for_each_node_state(nid, N_HIGH_MEMORY) - total += mem_cgroup_node_nr_file_lru_pages(memcg, nid); - - return total; -} - -static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg) -{ - u64 total = 0; - int nid; - - for_each_node_state(nid, N_HIGH_MEMORY) - total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid); - - return total; -} - -static unsigned long -mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid) -{ - return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE); -} - -static unsigned long -mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg) -{ - u64 total = 0; - int nid; - - for_each_node_state(nid, N_HIGH_MEMORY) - total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid); - - return total; -} - -static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, - int nid) -{ - enum lru_list l; - u64 total = 0; - - for_each_lru(l) - total += mem_cgroup_get_zonestat_node(memcg, nid, l); - - return total; -} - -static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg) -{ - u64 total = 0; - int nid; - - for_each_node_state(nid, N_HIGH_MEMORY) - total += mem_cgroup_node_nr_lru_pages(memcg, nid); - - return total; -} -#endif /* CONFIG_NUMA */ - struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) { @@ -1329,7 +1263,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *mem) return margin >> PAGE_SHIFT; } -static unsigned int get_swappiness(struct mem_cgroup *memcg) +int mem_cgroup_swappiness(struct mem_cgroup *memcg) { struct cgroup *cgrp = memcg->css.cgroup; @@ -1401,10 +1335,9 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem) to = mc.to; if (!from) goto unlock; - if (from == mem || to == mem - || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css)) - || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css))) - ret = true; + + ret = mem_cgroup_same_or_subtree(mem, from) + || mem_cgroup_same_or_subtree(mem, to); unlock: spin_unlock(&mc.lock); return ret; @@ -1576,11 +1509,11 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, int nid, bool noswap) { - if (mem_cgroup_node_nr_file_lru_pages(mem, nid)) + if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE)) return true; if (noswap || !total_swap_pages) return false; - if (mem_cgroup_node_nr_anon_lru_pages(mem, nid)) + if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON)) return true; return false; @@ -1730,7 +1663,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; /* If memsw_is_minimum==1, swap-out is of-no-use. */ - if (!check_soft && root_mem->memsw_is_minimum) + if (!check_soft && !shrink && root_mem->memsw_is_minimum) noswap = true; while (1) { @@ -1776,12 +1709,11 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, /* we use swappiness of local cgroup */ if (check_soft) { ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, - noswap, get_swappiness(victim), zone, - &nr_scanned); + noswap, zone, &nr_scanned); *total_scanned += nr_scanned; } else ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, - noswap, get_swappiness(victim)); + noswap); css_put(&victim->css); /* * At shrinking usage, we can't check we should stop here or @@ -1803,38 +1735,77 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, /* * Check OOM-Killer is already running under our hierarchy. * If someone is running, return false. + * Has to be called with memcg_oom_lock */ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) { - int x, lock_count = 0; - struct mem_cgroup *iter; + struct mem_cgroup *iter, *failed = NULL; + bool cond = true; - for_each_mem_cgroup_tree(iter, mem) { - x = atomic_inc_return(&iter->oom_lock); - lock_count = max(x, lock_count); + for_each_mem_cgroup_tree_cond(iter, mem, cond) { + if (iter->oom_lock) { + /* + * this subtree of our hierarchy is already locked + * so we cannot give a lock. + */ + failed = iter; + cond = false; + } else + iter->oom_lock = true; } - if (lock_count == 1) + if (!failed) return true; + + /* + * OK, we failed to lock the whole subtree so we have to clean up + * what we set up to the failing subtree + */ + cond = true; + for_each_mem_cgroup_tree_cond(iter, mem, cond) { + if (iter == failed) { + cond = false; + continue; + } + iter->oom_lock = false; + } return false; } +/* + * Has to be called with memcg_oom_lock + */ static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) { struct mem_cgroup *iter; + for_each_mem_cgroup_tree(iter, mem) + iter->oom_lock = false; + return 0; +} + +static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem) +{ + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, mem) + atomic_inc(&iter->under_oom); +} + +static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) +{ + struct mem_cgroup *iter; + /* * When a new child is created while the hierarchy is under oom, * mem_cgroup_oom_lock() may not be called. We have to use * atomic_add_unless() here. */ for_each_mem_cgroup_tree(iter, mem) - atomic_add_unless(&iter->oom_lock, -1, 0); - return 0; + atomic_add_unless(&iter->under_oom, -1, 0); } - -static DEFINE_MUTEX(memcg_oom_mutex); +static DEFINE_SPINLOCK(memcg_oom_lock); static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); struct oom_wait_info { @@ -1845,25 +1816,20 @@ struct oom_wait_info { static int memcg_oom_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg; + struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg, + *oom_wait_mem; struct oom_wait_info *oom_wait_info; oom_wait_info = container_of(wait, struct oom_wait_info, wait); + oom_wait_mem = oom_wait_info->mem; - if (oom_wait_info->mem == wake_mem) - goto wakeup; - /* if no hierarchy, no match */ - if (!oom_wait_info->mem->use_hierarchy || !wake_mem->use_hierarchy) - return 0; /* * Both of oom_wait_info->mem and wake_mem are stable under us. * Then we can use css_is_ancestor without taking care of RCU. */ - if (!css_is_ancestor(&oom_wait_info->mem->css, &wake_mem->css) && - !css_is_ancestor(&wake_mem->css, &oom_wait_info->mem->css)) + if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem) + && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem)) return 0; - -wakeup: return autoremove_wake_function(wait, mode, sync, arg); } @@ -1875,7 +1841,7 @@ static void memcg_wakeup_oom(struct mem_cgroup *mem) static void memcg_oom_recover(struct mem_cgroup *mem) { - if (mem && atomic_read(&mem->oom_lock)) + if (mem && atomic_read(&mem->under_oom)) memcg_wakeup_oom(mem); } @@ -1893,8 +1859,10 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) owait.wait.private = current; INIT_LIST_HEAD(&owait.wait.task_list); need_to_kill = true; + mem_cgroup_mark_under_oom(mem); + /* At first, try to OOM lock hierarchy under mem.*/ - mutex_lock(&memcg_oom_mutex); + spin_lock(&memcg_oom_lock); locked = mem_cgroup_oom_lock(mem); /* * Even if signal_pending(), we can't quit charge() loop without @@ -1906,7 +1874,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) need_to_kill = false; if (locked) mem_cgroup_oom_notify(mem); - mutex_unlock(&memcg_oom_mutex); + spin_unlock(&memcg_oom_lock); if (need_to_kill) { finish_wait(&memcg_oom_waitq, &owait.wait); @@ -1915,10 +1883,13 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) schedule(); finish_wait(&memcg_oom_waitq, &owait.wait); } - mutex_lock(&memcg_oom_mutex); - mem_cgroup_oom_unlock(mem); + spin_lock(&memcg_oom_lock); + if (locked) + mem_cgroup_oom_unlock(mem); memcg_wakeup_oom(mem); - mutex_unlock(&memcg_oom_mutex); + spin_unlock(&memcg_oom_lock); + + mem_cgroup_unmark_under_oom(mem); if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) return false; @@ -2079,59 +2050,70 @@ static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) } /* - * Tries to drain stocked charges in other cpus. This function is asynchronous - * and just put a work per cpu for draining localy on each cpu. Caller can - * expects some charges will be back to res_counter later but cannot wait for - * it. + * Drains all per-CPU charge caches for given root_mem resp. subtree + * of the hierarchy under it. sync flag says whether we should block + * until the work is done. */ -static void drain_all_stock_async(struct mem_cgroup *root_mem) +static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) { int cpu, curcpu; - /* - * If someone calls draining, avoid adding more kworker runs. - */ - if (!mutex_trylock(&percpu_charge_mutex)) - return; + /* Notify other cpus that system-wide "drain" is running */ get_online_cpus(); - /* - * Get a hint for avoiding draining charges on the current cpu, - * which must be exhausted by our charging. It is not required that - * this be a precise check, so we use raw_smp_processor_id() instead of - * getcpu()/putcpu(). - */ - curcpu = raw_smp_processor_id(); + curcpu = get_cpu(); for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *mem; - if (cpu == curcpu) - continue; - mem = stock->cached; - if (!mem) + if (!mem || !stock->nr_pages) continue; - if (mem != root_mem) { - if (!root_mem->use_hierarchy) - continue; - /* check whether "mem" is under tree of "root_mem" */ - if (!css_is_ancestor(&mem->css, &root_mem->css)) - continue; + if (!mem_cgroup_same_or_subtree(root_mem, mem)) + continue; + if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { + if (cpu == curcpu) + drain_local_stock(&stock->work); + else + schedule_work_on(cpu, &stock->work); } - if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) - schedule_work_on(cpu, &stock->work); } + put_cpu(); + + if (!sync) + goto out; + + for_each_online_cpu(cpu) { + struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); + if (test_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) + flush_work(&stock->work); + } +out: put_online_cpus(); +} + +/* + * Tries to drain stocked charges in other cpus. This function is asynchronous + * and just put a work per cpu for draining localy on each cpu. Caller can + * expects some charges will be back to res_counter later but cannot wait for + * it. + */ +static void drain_all_stock_async(struct mem_cgroup *root_mem) +{ + /* + * If someone calls draining, avoid adding more kworker runs. + */ + if (!mutex_trylock(&percpu_charge_mutex)) + return; + drain_all_stock(root_mem, false); mutex_unlock(&percpu_charge_mutex); - /* We don't wait for flush_work */ } /* This is a synchronous drain interface. */ -static void drain_all_stock_sync(void) +static void drain_all_stock_sync(struct mem_cgroup *root_mem) { /* called when force_empty is called */ mutex_lock(&percpu_charge_mutex); - schedule_on_each_cpu(drain_local_stock); + drain_all_stock(root_mem, true); mutex_unlock(&percpu_charge_mutex); } @@ -2784,30 +2766,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, return 0; if (PageCompound(page)) return 0; - /* - * Corner case handling. This is called from add_to_page_cache() - * in usual. But some FS (shmem) precharges this page before calling it - * and call add_to_page_cache() with GFP_NOWAIT. - * - * For GFP_NOWAIT case, the page may be pre-charged before calling - * add_to_page_cache(). (See shmem.c) check it here and avoid to call - * charge twice. (It works but has to pay a bit larger cost.) - * And when the page is SwapCache, it should take swap information - * into account. This is under lock_page() now. - */ - if (!(gfp_mask & __GFP_WAIT)) { - struct page_cgroup *pc; - - pc = lookup_page_cgroup(page); - if (!pc) - return 0; - lock_page_cgroup(pc); - if (PageCgroupUsed(pc)) { - unlock_page_cgroup(pc); - return 0; - } - unlock_page_cgroup(pc); - } if (unlikely(!mm)) mm = &init_mm; @@ -3397,31 +3355,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, cgroup_release_and_wakeup_rmdir(&mem->css); } -/* - * A call to try to shrink memory usage on charge failure at shmem's swapin. - * Calling hierarchical_reclaim is not enough because we should update - * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. - * Moreover considering hierarchy, we should reclaim from the mem_over_limit, - * not from the memcg which this page would be charged to. - * try_charge_swapin does all of these works properly. - */ -int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) -{ - struct mem_cgroup *mem; - int ret; - - if (mem_cgroup_disabled()) - return 0; - - ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); - if (!ret) - mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - - return ret; -} - #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { @@ -3780,7 +3713,7 @@ move_account: goto out; /* This is for making all *used* pages to be on LRU. */ lru_add_drain_all(); - drain_all_stock_sync(); + drain_all_stock_sync(mem); ret = 0; mem_cgroup_start_move(mem); for_each_node_state(node, N_HIGH_MEMORY) { @@ -3826,7 +3759,7 @@ try_to_free: goto out; } progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, - false, get_swappiness(mem)); + false); if (!progress) { nr_retries--; /* maybe some writeback is necessary */ @@ -4152,15 +4085,15 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) s->stat[MCS_PGMAJFAULT] += val; /* per zone stat */ - val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); + val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; - val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_ANON); + val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON)); s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; - val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_FILE); + val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE)); s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; - val = mem_cgroup_get_local_zonestat(mem, LRU_ACTIVE_FILE); + val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE)); s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; - val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); + val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE)); s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; } @@ -4182,35 +4115,37 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg) struct cgroup *cont = m->private; struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); - total_nr = mem_cgroup_nr_lru_pages(mem_cont); + total_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL); seq_printf(m, "total=%lu", total_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid); + node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, LRU_ALL); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); - file_nr = mem_cgroup_nr_file_lru_pages(mem_cont); + file_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL_FILE); seq_printf(m, "file=%lu", file_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid); + node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, + LRU_ALL_FILE); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); - anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont); + anon_nr = mem_cgroup_nr_lru_pages(mem_cont, LRU_ALL_ANON); seq_printf(m, "anon=%lu", anon_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid); + node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, + LRU_ALL_ANON); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); - unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont); + unevictable_nr = mem_cgroup_nr_lru_pages(mem_cont, BIT(LRU_UNEVICTABLE)); seq_printf(m, "unevictable=%lu", unevictable_nr); for_each_node_state(nid, N_HIGH_MEMORY) { - node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont, - nid); + node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid, + BIT(LRU_UNEVICTABLE)); seq_printf(m, " N%d=%lu", nid, node_nr); } seq_putc(m, '\n'); @@ -4288,7 +4223,7 @@ static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); - return get_swappiness(memcg); + return mem_cgroup_swappiness(memcg); } static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, @@ -4578,15 +4513,15 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp, if (!event) return -ENOMEM; - mutex_lock(&memcg_oom_mutex); + spin_lock(&memcg_oom_lock); event->eventfd = eventfd; list_add(&event->list, &memcg->oom_notify); /* already in OOM ? */ - if (atomic_read(&memcg->oom_lock)) + if (atomic_read(&memcg->under_oom)) eventfd_signal(eventfd, 1); - mutex_unlock(&memcg_oom_mutex); + spin_unlock(&memcg_oom_lock); return 0; } @@ -4600,7 +4535,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, BUG_ON(type != _OOM_TYPE); - mutex_lock(&memcg_oom_mutex); + spin_lock(&memcg_oom_lock); list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) { if (ev->eventfd == eventfd) { @@ -4609,7 +4544,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, } } - mutex_unlock(&memcg_oom_mutex); + spin_unlock(&memcg_oom_lock); } static int mem_cgroup_oom_control_read(struct cgroup *cgrp, @@ -4619,7 +4554,7 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp, cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable); - if (atomic_read(&mem->oom_lock)) + if (atomic_read(&mem->under_oom)) cb->fill(cb, "under_oom", 1); else cb->fill(cb, "under_oom", 0); @@ -4997,7 +4932,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) INIT_LIST_HEAD(&mem->oom_notify); if (parent) - mem->swappiness = get_swappiness(parent); + mem->swappiness = mem_cgroup_swappiness(parent); atomic_set(&mem->refcnt, 1); mem->move_charge_at_immigrate = 0; mutex_init(&mem->thresholds_lock); @@ -5181,15 +5116,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, pgoff = pte_to_pgoff(ptent); /* page is moved even if it's not RSS of this task(page-faulted). */ - if (!mapping_cap_swap_backed(mapping)) { /* normal file */ - page = find_get_page(mapping, pgoff); - } else { /* shmem/tmpfs file. we should take account of swap too. */ - swp_entry_t ent; - mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); + page = find_get_page(mapping, pgoff); + +#ifdef CONFIG_SWAP + /* shmem/tmpfs may report page out on swap: account for that too. */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); if (do_swap_account) - entry->val = ent.val; + *entry = swap; + page = find_get_page(&swapper_space, swap.val); } - +#endif return page; } |