From 425480081e936d8725f0d44b8829d699bf088c6b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 13:38:36 -0400 Subject: tracing: add handler to trace_stat Currently, if a trace_stat user wants a handle to some private data, the trace_stat infrastructure does not supply a way to do that. This patch passes the trace_stat structure to the start function of the trace_stat code. Signed-off-by: Steven Rostedt --- kernel/trace/trace_stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index f71b85b22cf..f8f48d84b2c 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -85,7 +85,7 @@ static int stat_seq_init(struct tracer_stat_session *session) if (!ts->stat_cmp) ts->stat_cmp = dummy_cmp; - stat = ts->stat_start(); + stat = ts->stat_start(ts); if (!stat) goto exit; -- cgit v1.2.3-70-g09d2 From 0d64f8342de26d02451900b1aad94716fe92c4ab Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 May 2009 05:58:49 +0200 Subject: tracing/stat: replace trace_stat_session by stat_session The "trace" prefix in struct trace_stat_session type is annoying while reading the trace_stat.c file. It makes the lines longer, and is not that much useful to explain the sense of this type. Just keep "struct stat_session" for this type. [ Impact: make the code a bit more readable ] Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_stat.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index fdde3a4a94c..3b6816be825 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -22,7 +22,7 @@ struct trace_stat_list { }; /* A stat session is the stats output in one file */ -struct tracer_stat_session { +struct stat_session { struct list_head session_list; struct tracer_stat *ts; struct list_head stat_list; @@ -38,7 +38,7 @@ static DEFINE_MUTEX(all_stat_sessions_mutex); static struct dentry *stat_dir; -static void reset_stat_session(struct tracer_stat_session *session) +static void reset_stat_session(struct stat_session *session) { struct trace_stat_list *node, *next; @@ -48,7 +48,7 @@ static void reset_stat_session(struct tracer_stat_session *session) INIT_LIST_HEAD(&session->stat_list); } -static void destroy_session(struct tracer_stat_session *session) +static void destroy_session(struct stat_session *session) { debugfs_remove(session->file); reset_stat_session(session); @@ -71,7 +71,7 @@ static int dummy_cmp(void *p1, void *p2) * All of these copies and sorting are required on all opening * since the stats could have changed between two file sessions. */ -static int stat_seq_init(struct tracer_stat_session *session) +static int stat_seq_init(struct stat_session *session) { struct trace_stat_list *iter_entry, *new_entry; struct tracer_stat *ts = session->ts; @@ -154,7 +154,7 @@ exit_free_list: static void *stat_seq_start(struct seq_file *s, loff_t *pos) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; /* Prevent from tracer switch or stat_list modification */ mutex_lock(&session->stat_mutex); @@ -168,7 +168,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos) static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; if (p == SEQ_START_TOKEN) return seq_list_start(&session->stat_list, *pos); @@ -178,13 +178,13 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) static void stat_seq_stop(struct seq_file *s, void *p) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; mutex_unlock(&session->stat_mutex); } static int stat_seq_show(struct seq_file *s, void *v) { - struct tracer_stat_session *session = s->private; + struct stat_session *session = s->private; struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); if (v == SEQ_START_TOKEN) @@ -205,7 +205,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file) { int ret; - struct tracer_stat_session *session = inode->i_private; + struct stat_session *session = inode->i_private; ret = seq_open(file, &trace_stat_seq_ops); if (!ret) { @@ -222,7 +222,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file) */ static int tracing_stat_release(struct inode *i, struct file *f) { - struct tracer_stat_session *session = i->i_private; + struct stat_session *session = i->i_private; mutex_lock(&session->stat_mutex); reset_stat_session(session); @@ -251,7 +251,7 @@ static int tracing_stat_init(void) return 0; } -static int init_stat_file(struct tracer_stat_session *session) +static int init_stat_file(struct stat_session *session) { if (!stat_dir && tracing_stat_init()) return -ENODEV; @@ -266,7 +266,7 @@ static int init_stat_file(struct tracer_stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { - struct tracer_stat_session *session, *node, *tmp; + struct stat_session *session, *node, *tmp; int ret; if (!trace) @@ -286,7 +286,7 @@ int register_stat_tracer(struct tracer_stat *trace) mutex_unlock(&all_stat_sessions_mutex); /* Init the session */ - session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL); + session = kmalloc(sizeof(struct stat_session), GFP_KERNEL); if (!session) return -ENOMEM; @@ -312,7 +312,7 @@ int register_stat_tracer(struct tracer_stat *trace) void unregister_stat_tracer(struct tracer_stat *trace) { - struct tracer_stat_session *node, *tmp; + struct stat_session *node, *tmp; mutex_lock(&all_stat_sessions_mutex); list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { -- cgit v1.2.3-70-g09d2 From 8f184f27300f66f6dcc8296c2dae7a1fbe8429c9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 May 2009 06:24:36 +0200 Subject: tracing/stat: replace linked list by an rbtree for sorting When the stat tracing framework prepares the entries from a tracer to output them to the user, it starts by computing a linear sort through a linked list to give the entries ordered by relevance to the user. This is quite ugly and causes a small latency when we begin to read the file. This patch changes that by turning the linked list into a red-black tree. Athough the whole iteration using the start and next tracer callbacks while opening the file remain the same, it is now much more fast and scalable. The rbtree guarantees O(log(n)) insertions whereas a linked list with linear sorting brought us a O(n) despair. Now the (visible) latency has disapeared. [ Impact: kill the latency while starting to read a stat tracer file ] Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_stat.c | 140 +++++++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 40 deletions(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 3b6816be825..0bd0fc82da5 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -1,7 +1,7 @@ /* * Infrastructure for statistic tracing (histogram output). * - * Copyright (C) 2008 Frederic Weisbecker + * Copyright (C) 2008-2009 Frederic Weisbecker * * Based on the code from trace_branch.c which is * Copyright (C) 2008 Steven Rostedt @@ -10,14 +10,19 @@ #include +#include #include #include "trace_stat.h" #include "trace.h" -/* List of stat entries from a tracer */ -struct trace_stat_list { - struct list_head list; +/* + * List of stat red-black nodes from a tracer + * We use a such tree to sort quickly the stat + * entries from the tracer. + */ +struct stat_node { + struct rb_node node; void *stat; }; @@ -25,7 +30,7 @@ struct trace_stat_list { struct stat_session { struct list_head session_list; struct tracer_stat *ts; - struct list_head stat_list; + struct rb_root stat_root; struct mutex stat_mutex; struct dentry *file; }; @@ -37,15 +42,45 @@ static DEFINE_MUTEX(all_stat_sessions_mutex); /* The root directory for all stat files */ static struct dentry *stat_dir; +/* + * Iterate through the rbtree using a post order traversal path + * to release the next node. + * It won't necessary release one at each iteration + * but it will at least advance closer to the next one + * to be released. + */ +static struct rb_node *release_next(struct rb_node *node) +{ + struct stat_node *snode; + struct rb_node *parent = rb_parent(node); + + if (node->rb_left) + return node->rb_left; + else if (node->rb_right) + return node->rb_right; + else { + if (!parent) + return NULL; + if (parent->rb_left == node) + parent->rb_left = NULL; + else + parent->rb_right = NULL; + + snode = container_of(node, struct stat_node, node); + kfree(snode); + + return parent; + } +} static void reset_stat_session(struct stat_session *session) { - struct trace_stat_list *node, *next; + struct rb_node *node = session->stat_root.rb_node; - list_for_each_entry_safe(node, next, &session->stat_list, list) - kfree(node); + while (node) + node = release_next(node); - INIT_LIST_HEAD(&session->stat_list); + session->stat_root = RB_ROOT; } static void destroy_session(struct stat_session *session) @@ -56,6 +91,35 @@ static void destroy_session(struct stat_session *session) kfree(session); } +typedef int (*cmp_stat_t)(void *, void *); + +static void +insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* + * Figure out where to put new node + * This is a descendent sorting + */ + while (*new) { + struct stat_node *this; + int result; + + this = container_of(*new, struct stat_node, node); + result = cmp(data->stat, this->stat); + + parent = *new; + if (result >= 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + /* * For tracers that don't provide a stat_cmp callback. * This one will force an immediate insertion on tail of @@ -73,8 +137,9 @@ static int dummy_cmp(void *p1, void *p2) */ static int stat_seq_init(struct stat_session *session) { - struct trace_stat_list *iter_entry, *new_entry; struct tracer_stat *ts = session->ts; + struct stat_node *new_entry; + struct rb_root *root; void *stat; int ret = 0; int i; @@ -93,15 +158,13 @@ static int stat_seq_init(struct stat_session *session) * The first entry. Actually this is the second, but the first * one (the stat_list head) is pointless. */ - new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) { ret = -ENOMEM; goto exit; } - - INIT_LIST_HEAD(&new_entry->list); - - list_add(&new_entry->list, &session->stat_list); + root = &session->stat_root; + insert_stat(root, new_entry, dummy_cmp); new_entry->stat = stat; @@ -116,31 +179,17 @@ static int stat_seq_init(struct stat_session *session) if (!stat) break; - new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL); + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) { ret = -ENOMEM; goto exit_free_list; } - INIT_LIST_HEAD(&new_entry->list); new_entry->stat = stat; - list_for_each_entry_reverse(iter_entry, &session->stat_list, - list) { - - /* Insertion with a descendent sorting */ - if (ts->stat_cmp(iter_entry->stat, - new_entry->stat) >= 0) { - - list_add(&new_entry->list, &iter_entry->list); - break; - } - } - - /* The current larger value */ - if (list_empty(&new_entry->list)) - list_add(&new_entry->list, &session->stat_list); + insert_stat(root, new_entry, ts->stat_cmp); } + exit: mutex_unlock(&session->stat_mutex); return ret; @@ -155,25 +204,38 @@ exit_free_list: static void *stat_seq_start(struct seq_file *s, loff_t *pos) { struct stat_session *session = s->private; + struct rb_node *node; + int i; /* Prevent from tracer switch or stat_list modification */ mutex_lock(&session->stat_mutex); /* If we are in the beginning of the file, print the headers */ - if (!*pos && session->ts->stat_headers) + if (!*pos && session->ts->stat_headers) { + (*pos)++; return SEQ_START_TOKEN; + } - return seq_list_start(&session->stat_list, *pos); + node = rb_first(&session->stat_root); + for (i = 0; node && i < *pos; i++) + node = rb_next(node); + + (*pos)++; + + return node; } static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos) { struct stat_session *session = s->private; + struct rb_node *node = p; + + (*pos)++; if (p == SEQ_START_TOKEN) - return seq_list_start(&session->stat_list, *pos); + return rb_first(&session->stat_root); - return seq_list_next(p, &session->stat_list, pos); + return rb_next(node); } static void stat_seq_stop(struct seq_file *s, void *p) @@ -185,7 +247,7 @@ static void stat_seq_stop(struct seq_file *s, void *p) static int stat_seq_show(struct seq_file *s, void *v) { struct stat_session *session = s->private; - struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list); + struct stat_node *l = container_of(v, struct stat_node, node); if (v == SEQ_START_TOKEN) return session->ts->stat_headers(s); @@ -286,15 +348,13 @@ int register_stat_tracer(struct tracer_stat *trace) mutex_unlock(&all_stat_sessions_mutex); /* Init the session */ - session = kmalloc(sizeof(struct stat_session), GFP_KERNEL); + session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) return -ENOMEM; session->ts = trace; INIT_LIST_HEAD(&session->session_list); - INIT_LIST_HEAD(&session->stat_list); mutex_init(&session->stat_mutex); - session->file = NULL; ret = init_stat_file(session); if (ret) { -- cgit v1.2.3-70-g09d2 From b3dd7ba7d862707800c7ac45068f14ade2b65155 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 27 May 2009 11:04:26 +0800 Subject: tracing/stat: change dummpy_cmp() to return -1 Currently the output of trace_stat/workqueues is totally reversed: # cat /debug/tracing/trace_stat/workqueues ... 1 17 17 210 37 `-blk_unplug_work+0x0/0x57 1 3779 3779 181 11 |-cfq_kick_queue+0x0/0x2f 1 3796 3796 kblockd/1:120 ... The correct output should be: 1 3796 3796 kblockd/1:120 1 3779 3779 181 11 |-cfq_kick_queue+0x0/0x2f 1 17 17 210 37 `-blk_unplug_work+0x0/0x57 It's caused by "tracing/stat: replace linked list by an rbtree for sorting" (53059c9b67a62a3dc8c80204d3da42b9267ea5a0). dummpy_cmp() should return -1, so rb_node will always be inserted as right-most node in the rbtree, thus we sort the output in ascending order. [ Impact: fix the output of trace_stat/workqueues ] Signed-off-by: Li Zefan Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 0bd0fc82da5..5816d1aebcc 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -127,7 +127,7 @@ insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp) */ static int dummy_cmp(void *p1, void *p2) { - return 1; + return -1; } /* -- cgit v1.2.3-70-g09d2 From e16228069083a2f6b94383ac5739aea7a0f38ce4 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 27 May 2009 11:04:48 +0800 Subject: tracing/stat: remember to free root node When closing a trace_stat file, we destroy the rbtree constructed during file open, but there is memory leak that the root node is not freed. [ Impact: fix memory leak when closing a trace_stat file ] Signed-off-by: Li Zefan Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 5816d1aebcc..8030ec98dba 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -60,8 +60,8 @@ static struct rb_node *release_next(struct rb_node *node) return node->rb_right; else { if (!parent) - return NULL; - if (parent->rb_left == node) + ; + else if (parent->rb_left == node) parent->rb_left = NULL; else parent->rb_right = NULL; -- cgit v1.2.3-70-g09d2 From dbd3fbdfeecfad4e71139db05d72560c3583e2a9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 27 May 2009 11:42:46 +0800 Subject: tracing/stat: do some cleanups - remove duplicate code in stat_seq_init() - update comments to reflect the change from stat list to stat rbtree [ Impact: clean up ] Signed-off-by: Li Zefan Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_stat.c | 54 ++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 33 deletions(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 8030ec98dba..17f20ebdad2 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -93,10 +93,15 @@ static void destroy_session(struct stat_session *session) typedef int (*cmp_stat_t)(void *, void *); -static void -insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp) +static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp) { struct rb_node **new = &(root->rb_node), *parent = NULL; + struct stat_node *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->stat = stat; /* * Figure out where to put new node @@ -118,12 +123,13 @@ insert_stat(struct rb_root *root, struct stat_node *data, cmp_stat_t cmp) rb_link_node(&data->node, parent, new); rb_insert_color(&data->node, root); + return 0; } /* * For tracers that don't provide a stat_cmp callback. - * This one will force an immediate insertion on tail of - * the list. + * This one will force an insertion as right-most node + * in the rbtree. */ static int dummy_cmp(void *p1, void *p2) { @@ -131,15 +137,14 @@ static int dummy_cmp(void *p1, void *p2) } /* - * Initialize the stat list at each trace_stat file opening. + * Initialize the stat rbtree at each trace_stat file opening. * All of these copies and sorting are required on all opening * since the stats could have changed between two file sessions. */ static int stat_seq_init(struct stat_session *session) { struct tracer_stat *ts = session->ts; - struct stat_node *new_entry; - struct rb_root *root; + struct rb_root *root = &session->stat_root; void *stat; int ret = 0; int i; @@ -154,23 +159,12 @@ static int stat_seq_init(struct stat_session *session) if (!stat) goto exit; - /* - * The first entry. Actually this is the second, but the first - * one (the stat_list head) is pointless. - */ - new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); - if (!new_entry) { - ret = -ENOMEM; + ret = insert_stat(root, stat, ts->stat_cmp); + if (ret) goto exit; - } - root = &session->stat_root; - insert_stat(root, new_entry, dummy_cmp); - - new_entry->stat = stat; /* - * Iterate over the tracer stat entries and store them in a sorted - * list. + * Iterate over the tracer stat entries and store them in an rbtree. */ for (i = 1; ; i++) { stat = ts->stat_next(stat, i); @@ -179,22 +173,16 @@ static int stat_seq_init(struct stat_session *session) if (!stat) break; - new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); - if (!new_entry) { - ret = -ENOMEM; - goto exit_free_list; - } - - new_entry->stat = stat; - - insert_stat(root, new_entry, ts->stat_cmp); + ret = insert_stat(root, stat, ts->stat_cmp); + if (ret) + goto exit_free_rbtree; } exit: mutex_unlock(&session->stat_mutex); return ret; -exit_free_list: +exit_free_rbtree: reset_stat_session(session); mutex_unlock(&session->stat_mutex); return ret; @@ -207,7 +195,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos) struct rb_node *node; int i; - /* Prevent from tracer switch or stat_list modification */ + /* Prevent from tracer switch or rbtree modification */ mutex_lock(&session->stat_mutex); /* If we are in the beginning of the file, print the headers */ @@ -280,7 +268,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file) } /* - * Avoid consuming memory with our now useless list. + * Avoid consuming memory with our now useless rbtree. */ static int tracing_stat_release(struct inode *i, struct file *f) { -- cgit v1.2.3-70-g09d2 From 43bd1236234cacbc18d1476a9b57e7a306efddf5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 30 May 2009 04:25:30 +0200 Subject: tracing/stat: remove unappropriate safe walk on list register_stat_tracer() uses list_for_each_entry_safe to check whether a tracer is already present in the list. But we don't delete anything from the list here, so we don't need the safe version [ Impact: cleanup list use is stat tracing ] Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace_stat.c') diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 17f20ebdad2..c00643733f4 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -316,7 +316,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { - struct stat_session *session, *node, *tmp; + struct stat_session *session, *node; int ret; if (!trace) @@ -327,7 +327,7 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); - list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) { + list_for_each_entry(node, &all_stat_sessions, session_list) { if (node->ts == trace) { mutex_unlock(&all_stat_sessions_mutex); return -EINVAL; -- cgit v1.2.3-70-g09d2