From 4d0c066d29f030d47d19678f8008933e67dd3b72 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 8 Jul 2011 15:39:42 -0700 Subject: memcg: fix reclaimable lru check in memcg Now, in mem_cgroup_hierarchical_reclaim(), mem_cgroup_local_usage() is used for checking whether the memcg contains reclaimable pages or not. If no pages in it, the routine skips it. But, mem_cgroup_local_usage() contains Unevictable pages and cannot handle "noswap" condition correctly. This doesn't work on a swapless system. This patch adds test_mem_cgroup_reclaimable() and replaces mem_cgroup_local_usage(). test_mem_cgroup_reclaimable() see LRU counter and returns correct answer to the caller. And this new function has "noswap" argument and can see only FILE LRU if necessary. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix kerneldoc layout] Signed-off-by: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Ying Han Cc: Johannes Weiner Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 107 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 76 insertions(+), 31 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ddffc74cdeb..a7a5cb1bf2c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -577,15 +577,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem, return val; } -static long mem_cgroup_local_usage(struct mem_cgroup *mem) -{ - long ret; - - ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); - ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); - return ret; -} - static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, bool charge) { @@ -1129,7 +1120,6 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, return MEM_CGROUP_ZSTAT(mz, lru); } -#ifdef CONFIG_NUMA static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg, int nid) { @@ -1141,6 +1131,17 @@ static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg, return ret; } +static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg, + int nid) +{ + unsigned long ret; + + ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) + + mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON); + return ret; +} + +#if MAX_NUMNODES > 1 static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg) { u64 total = 0; @@ -1152,17 +1153,6 @@ static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg) return total; } -static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg, - int nid) -{ - unsigned long ret; - - ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) + - mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON); - - return ret; -} - static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg) { u64 total = 0; @@ -1559,6 +1549,28 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) return ret; } +/** + * test_mem_cgroup_node_reclaimable + * @mem: the target memcg + * @nid: the node ID to be checked. + * @noswap : specify true here if the user wants flle only information. + * + * This function returns whether the specified memcg contains any + * reclaimable pages on a node. Returns true if there are any reclaimable + * pages in the node. + */ +static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, + int nid, bool noswap) +{ + if (mem_cgroup_node_nr_file_lru_pages(mem, nid)) + return true; + if (noswap || !total_swap_pages) + return false; + if (mem_cgroup_node_nr_anon_lru_pages(mem, nid)) + return true; + return false; + +} #if MAX_NUMNODES > 1 /* @@ -1580,15 +1592,8 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { - if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) || - mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE)) - continue; - - if (total_swap_pages && - (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) || - mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON))) - continue; - node_clear(nid, mem->scan_nodes); + if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) + node_clear(nid, mem->scan_nodes); } } @@ -1627,11 +1632,51 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem) return node; } +/* + * Check all nodes whether it contains reclaimable pages or not. + * For quick scan, we make use of scan_nodes. This will allow us to skip + * unused nodes. But scan_nodes is lazily updated and may not cotain + * enough new information. We need to do double check. + */ +bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) +{ + int nid; + + /* + * quick check...making use of scan_node. + * We can skip unused nodes. + */ + if (!nodes_empty(mem->scan_nodes)) { + for (nid = first_node(mem->scan_nodes); + nid < MAX_NUMNODES; + nid = next_node(nid, mem->scan_nodes)) { + + if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) + return true; + } + } + /* + * Check rest of nodes. + */ + for_each_node_state(nid, N_HIGH_MEMORY) { + if (node_isset(nid, mem->scan_nodes)) + continue; + if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) + return true; + } + return false; +} + #else int mem_cgroup_select_victim_node(struct mem_cgroup *mem) { return 0; } + +bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) +{ + return test_mem_cgroup_node_reclaimable(mem, 0, noswap); +} #endif /* @@ -1702,7 +1747,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, } } } - if (!mem_cgroup_local_usage(victim)) { + if (!mem_cgroup_reclaimable(victim, noswap)) { /* this cgroup's local usage == 0 */ css_put(&victim->css); continue; -- cgit v1.2.3-70-g09d2 From 453a9bf347f1e22a5bb3605ced43b2366921221d Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Fri, 8 Jul 2011 15:39:43 -0700 Subject: memcg: fix numa scan information update to be triggered by memory event commit 889976dbcb12 ("memcg: reclaim memory from nodes in round-robin order") adds an numa node round-robin for memcg. But the information is updated once per 10sec. This patch changes the update trigger from jiffies to memcg's event count. After this patch, numa scan information will be updated when we see 1024 events of pagein/pageout under a memcg. [akpm@linux-foundation.org: attempt to repair code layout] Signed-off-by: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Ying Han Cc: Johannes Weiner Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a7a5cb1bf2c..e013b8e57d2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -108,10 +108,12 @@ enum mem_cgroup_events_index { enum mem_cgroup_events_target { MEM_CGROUP_TARGET_THRESH, MEM_CGROUP_TARGET_SOFTLIMIT, + MEM_CGROUP_TARGET_NUMAINFO, MEM_CGROUP_NTARGETS, }; #define THRESHOLDS_EVENTS_TARGET (128) #define SOFTLIMIT_EVENTS_TARGET (1024) +#define NUMAINFO_EVENTS_TARGET (1024) struct mem_cgroup_stat_cpu { long count[MEM_CGROUP_STAT_NSTATS]; @@ -237,7 +239,8 @@ struct mem_cgroup { int last_scanned_node; #if MAX_NUMNODES > 1 nodemask_t scan_nodes; - unsigned long next_scan_node_update; + atomic_t numainfo_events; + atomic_t numainfo_updating; #endif /* * Should the accounting and control be hierarchical, per subtree? @@ -680,6 +683,9 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) case MEM_CGROUP_TARGET_SOFTLIMIT: next = val + SOFTLIMIT_EVENTS_TARGET; break; + case MEM_CGROUP_TARGET_NUMAINFO: + next = val + NUMAINFO_EVENTS_TARGET; + break; default: return; } @@ -698,11 +704,19 @@ static void memcg_check_events(struct mem_cgroup *mem, struct page *page) mem_cgroup_threshold(mem); __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); if (unlikely(__memcg_event_check(mem, - MEM_CGROUP_TARGET_SOFTLIMIT))){ + MEM_CGROUP_TARGET_SOFTLIMIT))) { mem_cgroup_update_tree(mem, page); __mem_cgroup_target_update(mem, - MEM_CGROUP_TARGET_SOFTLIMIT); + MEM_CGROUP_TARGET_SOFTLIMIT); + } +#if MAX_NUMNODES > 1 + if (unlikely(__memcg_event_check(mem, + MEM_CGROUP_TARGET_NUMAINFO))) { + atomic_inc(&mem->numainfo_events); + __mem_cgroup_target_update(mem, + MEM_CGROUP_TARGET_NUMAINFO); } +#endif } } @@ -1582,11 +1596,15 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) { int nid; - - if (time_after(mem->next_scan_node_update, jiffies)) + /* + * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET + * pagein/pageout changes since the last update. + */ + if (!atomic_read(&mem->numainfo_events)) + return; + if (atomic_inc_return(&mem->numainfo_updating) > 1) return; - mem->next_scan_node_update = jiffies + 10*HZ; /* make a nodemask where this memcg uses memory from */ mem->scan_nodes = node_states[N_HIGH_MEMORY]; @@ -1595,6 +1613,9 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) node_clear(nid, mem->scan_nodes); } + + atomic_set(&mem->numainfo_events, 0); + atomic_set(&mem->numainfo_updating, 0); } /* -- cgit v1.2.3-70-g09d2