From a433658c30974fc87ba3ff52d7e4e6299762aa3d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:13 -0700 Subject: vmscan,memcg: memcg aware swap token Currently, memcg reclaim can disable swap token even if the swap token mm doesn't belong in its memory cgroup. It's slightly risky. If an admin creates very small mem-cgroup and silly guy runs contentious heavy memory pressure workload, every tasks are going to lose swap token and then system may become unresponsive. That's bad. This patch adds 'memcg' parameter into disable_swap_token(). and if the parameter doesn't match swap token, VM doesn't disable it. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Reviewed-by: KAMEZAWA Hiroyuki Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/thrash.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 19 deletions(-) (limited to 'mm/thrash.c') diff --git a/mm/thrash.c b/mm/thrash.c index 2372d4ed5dd..6cdf8651138 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -21,11 +21,31 @@ #include #include #include +#include static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; +struct mem_cgroup *swap_token_memcg; static unsigned int global_faults; +#ifdef CONFIG_CGROUP_MEM_RES_CTLR +static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + + memcg = try_get_mem_cgroup_from_mm(mm); + if (memcg) + css_put(mem_cgroup_css(memcg)); + + return memcg; +} +#else +static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) +{ + return NULL; +} +#endif + void grab_swap_token(struct mm_struct *mm) { int current_interval; @@ -38,40 +58,69 @@ void grab_swap_token(struct mm_struct *mm) return; /* First come first served */ - if (swap_token_mm == NULL) { - mm->token_priority = mm->token_priority + 2; - swap_token_mm = mm; + if (!swap_token_mm) + goto replace_token; + + if (mm == swap_token_mm) { + mm->token_priority += 2; goto out; } - if (mm != swap_token_mm) { - if (current_interval < mm->last_interval) - mm->token_priority++; - else { - if (likely(mm->token_priority > 0)) - mm->token_priority--; - } - /* Check if we deserve the token */ - if (mm->token_priority > swap_token_mm->token_priority) { - mm->token_priority += 2; - swap_token_mm = mm; - } - } else { - /* Token holder came in again! */ - mm->token_priority += 2; + if (current_interval < mm->last_interval) + mm->token_priority++; + else { + if (likely(mm->token_priority > 0)) + mm->token_priority--; } + /* Check if we deserve the token */ + if (mm->token_priority > swap_token_mm->token_priority) + goto replace_token; + out: mm->faultstamp = global_faults; mm->last_interval = current_interval; spin_unlock(&swap_token_lock); + return; + +replace_token: + mm->token_priority += 2; + swap_token_mm = mm; + swap_token_memcg = swap_token_memcg_from_mm(mm); + goto out; } /* Called on process exit. */ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) + if (likely(mm == swap_token_mm)) { swap_token_mm = NULL; + swap_token_memcg = NULL; + } spin_unlock(&swap_token_lock); } + +static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b) +{ + if (!a) + return true; + if (!b) + return true; + if (a == b) + return true; + return false; +} + +void disable_swap_token(struct mem_cgroup *memcg) +{ + /* memcg reclaim don't disable unrelated mm token. */ + if (match_memcg(memcg, swap_token_memcg)) { + spin_lock(&swap_token_lock); + if (match_memcg(memcg, swap_token_memcg)) { + swap_token_mm = NULL; + swap_token_memcg = NULL; + } + spin_unlock(&swap_token_lock); + } +} -- cgit v1.2.3-70-g09d2 From 83cd81a34357a632509f7491eec81e62e71d65f7 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:14 -0700 Subject: vmscan: implement swap token trace This is useful for observing swap token activity. example output: zsh-1845 [000] 598.962716: update_swap_token_priority: mm=ffff88015eaf7700 old_prio=1 new_prio=0 memtoy-1830 [001] 602.033900: update_swap_token_priority: mm=ffff880037a45880 old_prio=947 new_prio=949 memtoy-1830 [000] 602.041509: update_swap_token_priority: mm=ffff880037a45880 old_prio=949 new_prio=951 memtoy-1830 [000] 602.051959: update_swap_token_priority: mm=ffff880037a45880 old_prio=951 new_prio=953 memtoy-1830 [000] 602.052188: update_swap_token_priority: mm=ffff880037a45880 old_prio=953 new_prio=955 memtoy-1830 [001] 602.427184: put_swap_token: token_mm=ffff880037a45880 zsh-1789 [000] 602.427281: replace_swap_token: old_token_mm= (null) old_prio=0 new_token_mm=ffff88015eaf7018 new_prio=2 zsh-1789 [001] 602.433456: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=2 new_prio=4 zsh-1789 [000] 602.437613: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=4 new_prio=6 zsh-1789 [000] 602.443924: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=6 new_prio=8 zsh-1789 [000] 602.451873: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=8 new_prio=10 zsh-1789 [001] 602.462639: update_swap_token_priority: mm=ffff88015eaf7018 old_prio=10 new_prio=12 Signed-off-by: KOSAKI Motohiro Acked-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 77 +++++++++++++++++++++++++++++++++++++++++++ mm/thrash.c | 11 ++++++- 2 files changed, 87 insertions(+), 1 deletion(-) (limited to 'mm/thrash.c') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ea422aaa23e..1798e0cee2a 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "gfpflags.h" #define RECLAIM_WB_ANON 0x0001u @@ -310,6 +312,81 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive, show_reclaim_flags(__entry->reclaim_flags)) ); +TRACE_EVENT(replace_swap_token, + TP_PROTO(struct mm_struct *old_mm, + struct mm_struct *new_mm), + + TP_ARGS(old_mm, new_mm), + + TP_STRUCT__entry( + __field(struct mm_struct*, old_mm) + __field(unsigned int, old_prio) + __field(struct mm_struct*, new_mm) + __field(unsigned int, new_prio) + ), + + TP_fast_assign( + __entry->old_mm = old_mm; + __entry->old_prio = old_mm ? old_mm->token_priority : 0; + __entry->new_mm = new_mm; + __entry->new_prio = new_mm->token_priority; + ), + + TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u", + __entry->old_mm, __entry->old_prio, + __entry->new_mm, __entry->new_prio) +); + +DECLARE_EVENT_CLASS(put_swap_token_template, + TP_PROTO(struct mm_struct *swap_token_mm), + + TP_ARGS(swap_token_mm), + + TP_STRUCT__entry( + __field(struct mm_struct*, swap_token_mm) + ), + + TP_fast_assign( + __entry->swap_token_mm = swap_token_mm; + ), + + TP_printk("token_mm=%p", __entry->swap_token_mm) +); + +DEFINE_EVENT(put_swap_token_template, put_swap_token, + TP_PROTO(struct mm_struct *swap_token_mm), + TP_ARGS(swap_token_mm) +); + +DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, + TP_PROTO(struct mm_struct *swap_token_mm), + TP_ARGS(swap_token_mm), + TP_CONDITION(swap_token_mm != NULL) +); + +TRACE_EVENT_CONDITION(update_swap_token_priority, + TP_PROTO(struct mm_struct *mm, + unsigned int old_prio), + + TP_ARGS(mm, old_prio), + + TP_CONDITION(mm->token_priority != old_prio), + + TP_STRUCT__entry( + __field(struct mm_struct*, mm) + __field(unsigned int, old_prio) + __field(unsigned int, new_prio) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->old_prio = old_prio; + __entry->new_prio = mm->token_priority; + ), + + TP_printk("mm=%p old_prio=%u new_prio=%u", + __entry->mm, __entry->old_prio, __entry->new_prio) +); #endif /* _TRACE_VMSCAN_H */ diff --git a/mm/thrash.c b/mm/thrash.c index 6cdf8651138..17d9e29e4c9 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -23,6 +23,8 @@ #include #include +#include + static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; @@ -49,6 +51,7 @@ static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) void grab_swap_token(struct mm_struct *mm) { int current_interval; + unsigned int old_prio = mm->token_priority; global_faults++; @@ -63,7 +66,7 @@ void grab_swap_token(struct mm_struct *mm) if (mm == swap_token_mm) { mm->token_priority += 2; - goto out; + goto update_priority; } if (current_interval < mm->last_interval) @@ -77,6 +80,9 @@ void grab_swap_token(struct mm_struct *mm) if (mm->token_priority > swap_token_mm->token_priority) goto replace_token; +update_priority: + trace_update_swap_token_priority(mm, old_prio); + out: mm->faultstamp = global_faults; mm->last_interval = current_interval; @@ -85,6 +91,7 @@ out: replace_token: mm->token_priority += 2; + trace_replace_swap_token(swap_token_mm, mm); swap_token_mm = mm; swap_token_memcg = swap_token_memcg_from_mm(mm); goto out; @@ -95,6 +102,7 @@ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); if (likely(mm == swap_token_mm)) { + trace_put_swap_token(swap_token_mm); swap_token_mm = NULL; swap_token_memcg = NULL; } @@ -118,6 +126,7 @@ void disable_swap_token(struct mem_cgroup *memcg) if (match_memcg(memcg, swap_token_memcg)) { spin_lock(&swap_token_lock); if (match_memcg(memcg, swap_token_memcg)) { + trace_disable_swap_token(swap_token_mm); swap_token_mm = NULL; swap_token_memcg = NULL; } -- cgit v1.2.3-70-g09d2 From d7911ef30cb7bec52234c2b7a5c275ac8f07905a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 15 Jun 2011 15:08:15 -0700 Subject: vmscan: implement swap token priority aging While testing for memcg aware swap token, I observed a swap token was often grabbed an intermittent running process (eg init, auditd) and they never release a token. Why? Some processes (eg init, auditd, audispd) wake up when a process exiting. And swap token can be get first page-in process when a process exiting makes no swap token owner. Thus such above intermittent running process often get a token. And currently, swap token priority is only decreased at page fault path. Then, if the process sleep immediately after to grab swap token, the swap token priority never be decreased. That's obviously undesirable. This patch implement very poor (and lightweight) priority aging. It only be affect to the above corner case and doesn't change swap tendency workload performance (eg multi process qsbench load) Signed-off-by: KOSAKI Motohiro Reviewed-by: Rik van Riel Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 20 +++++++++++++------- mm/thrash.c | 11 ++++++++++- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'mm/thrash.c') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 1798e0cee2a..b2c33bd955f 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -366,9 +366,10 @@ DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, TRACE_EVENT_CONDITION(update_swap_token_priority, TP_PROTO(struct mm_struct *mm, - unsigned int old_prio), + unsigned int old_prio, + struct mm_struct *swap_token_mm), - TP_ARGS(mm, old_prio), + TP_ARGS(mm, old_prio, swap_token_mm), TP_CONDITION(mm->token_priority != old_prio), @@ -376,16 +377,21 @@ TRACE_EVENT_CONDITION(update_swap_token_priority, __field(struct mm_struct*, mm) __field(unsigned int, old_prio) __field(unsigned int, new_prio) + __field(struct mm_struct*, swap_token_mm) + __field(unsigned int, swap_token_prio) ), TP_fast_assign( - __entry->mm = mm; - __entry->old_prio = old_prio; - __entry->new_prio = mm->token_priority; + __entry->mm = mm; + __entry->old_prio = old_prio; + __entry->new_prio = mm->token_priority; + __entry->swap_token_mm = swap_token_mm; + __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0; ), - TP_printk("mm=%p old_prio=%u new_prio=%u", - __entry->mm, __entry->old_prio, __entry->new_prio) + TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u", + __entry->mm, __entry->old_prio, __entry->new_prio, + __entry->swap_token_mm, __entry->swap_token_prio) ); #endif /* _TRACE_VMSCAN_H */ diff --git a/mm/thrash.c b/mm/thrash.c index 17d9e29e4c9..fabf2d0f516 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -25,10 +25,13 @@ #include +#define TOKEN_AGING_INTERVAL (0xFF) + static DEFINE_SPINLOCK(swap_token_lock); struct mm_struct *swap_token_mm; struct mem_cgroup *swap_token_memcg; static unsigned int global_faults; +static unsigned int last_aging; #ifdef CONFIG_CGROUP_MEM_RES_CTLR static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) @@ -64,6 +67,11 @@ void grab_swap_token(struct mm_struct *mm) if (!swap_token_mm) goto replace_token; + if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) { + swap_token_mm->token_priority /= 2; + last_aging = global_faults; + } + if (mm == swap_token_mm) { mm->token_priority += 2; goto update_priority; @@ -81,7 +89,7 @@ void grab_swap_token(struct mm_struct *mm) goto replace_token; update_priority: - trace_update_swap_token_priority(mm, old_prio); + trace_update_swap_token_priority(mm, old_prio, swap_token_mm); out: mm->faultstamp = global_faults; @@ -94,6 +102,7 @@ replace_token: trace_replace_swap_token(swap_token_mm, mm); swap_token_mm = mm; swap_token_memcg = swap_token_memcg_from_mm(mm); + last_aging = global_faults; goto out; } -- cgit v1.2.3-70-g09d2