summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/controllers/memcg_test.txt41
-rw-r--r--mm/memcontrol.c7
-rw-r--r--mm/memory.c11
3 files changed, 46 insertions, 13 deletions
diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt
index c91f69b0b54..08d4d3ea0d7 100644
--- a/Documentation/controllers/memcg_test.txt
+++ b/Documentation/controllers/memcg_test.txt
@@ -1,6 +1,6 @@
Memory Resource Controller(Memcg) Implementation Memo.
-Last Updated: 2008/12/10
-Base Kernel Version: based on 2.6.28-rc7-mm.
+Last Updated: 2008/12/15
+Base Kernel Version: based on 2.6.28-rc8-mm.
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
is complex. This is a document for memcg's internal behavior.
@@ -111,9 +111,40 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
(b) If the SwapCache has been mapped by processes, it has been
charged already.
- In case (a), we charge it. In case (b), we don't charge it.
- (But racy state between (a) and (b) exists. We do check it.)
- At charging, a charge recorded in swap_cgroup is moved to page_cgroup.
+ This swap-in is one of the most complicated work. In do_swap_page(),
+ following events occur when pte is unchanged.
+
+ (1) the page (SwapCache) is looked up.
+ (2) lock_page()
+ (3) try_charge_swapin()
+ (4) reuse_swap_page() (may call delete_swap_cache())
+ (5) commit_charge_swapin()
+ (6) swap_free().
+
+ Considering following situation for example.
+
+ (A) The page has not been charged before (2) and reuse_swap_page()
+ doesn't call delete_from_swap_cache().
+ (B) The page has not been charged before (2) and reuse_swap_page()
+ calls delete_from_swap_cache().
+ (C) The page has been charged before (2) and reuse_swap_page() doesn't
+ call delete_from_swap_cache().
+ (D) The page has been charged before (2) and reuse_swap_page() calls
+ delete_from_swap_cache().
+
+ memory.usage/memsw.usage changes to this page/swp_entry will be
+ Case (A) (B) (C) (D)
+ Event
+ Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
+ ===========================================
+ (3) +1/+1 +1/+1 +1/+1 +1/+1
+ (4) - 0/ 0 - -1/ 0
+ (5) 0/-1 0/ 0 -1/-1 0/ 0
+ (6) - 0/-1 - 0/-1
+ ===========================================
+ Result 1/ 1 1/ 1 1/ 1 1/ 1
+
+ In any cases, charges to this page should be 1/ 1.
4.2 Swap-out.
At swap-out, typical state transition is below.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a7ecf23150c..0ed61e27d52 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1169,10 +1169,11 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
/*
* Now swap is on-memory. This means this page may be
* counted both as mem and swap....double count.
- * Fix it by uncharging from memsw. This SwapCache is stable
- * because we're still under lock_page().
+ * Fix it by uncharging from memsw. Basically, this SwapCache is stable
+ * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
+ * may call delete_from_swap_cache() before reach here.
*/
- if (do_swap_account) {
+ if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
struct mem_cgroup *memcg;
memcg = swap_cgroup_record(ent, NULL);
diff --git a/mm/memory.c b/mm/memory.c
index e5bfbe6b594..e009ce87085 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2457,22 +2457,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
* while the page is counted on swap but not yet in mapcount i.e.
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
* must be called after the swap_free(), or it will never succeed.
- * And mem_cgroup_commit_charge_swapin(), which uses the swp_entry
- * in page->private, must be called before reuse_swap_page(),
- * which may delete_from_swap_cache().
+ * Because delete_from_swap_page() may be called by reuse_swap_page(),
+ * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
+ * in page->private. In this case, a record in swap_cgroup is silently
+ * discarded at swap_free().
*/
- mem_cgroup_commit_charge_swapin(page, ptr);
inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && reuse_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
write_access = 0;
}
-
flush_icache_page(vma, page);
set_pte_at(mm, address, page_table, pte);
page_add_anon_rmap(page, vma, address);
+ /* It's better to call commit-charge after rmap is established */
+ mem_cgroup_commit_charge_swapin(page, ptr);
swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))