From d144d5ee6a265823d39f75ecfed351a516295183 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Wed, 26 Nov 2008 10:29:26 +0800 Subject: ftrace: adding other non-leaving .text sections Impact: widen the scope of recordmcount.pl Besides .text section, there are three .text sections that won't be freed after kernel booting. They are: .sched.text, .spinlock.text and .kprobes.text, which contain functions we can trace. But the last section ".kprobes.text" is particular, which has been marked as "notrace", we ignore it. Thus we add other two sections. Signed-off-by: Liming Wang Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- scripts/recordmcount.pl | 2 ++ 1 file changed, 2 insertions(+) (limited to 'scripts') diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 0197e2f6b54..0b1dc9f9bb0 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -112,6 +112,8 @@ my ($arch, $bits, $objdump, $objcopy, $cc, # Acceptable sections to record. my %text_sections = ( ".text" => 1, + ".sched.text" => 1, + ".spinlock.text" => 1, ); $objdump = "objdump" if ((length $objdump) == 0); -- cgit v1.2.3-70-g09d2 From f3f47a6768a29448866da4422b6f6bee485c947f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 23 Nov 2008 16:49:58 -0800 Subject: tracing: add "power-tracer": C/P state tracer to help power optimization Impact: new "power-tracer" ftrace plugin This patch adds a C/P-state ftrace plugin that will generate detailed statistics about the C/P-states that are being used, so that we can look at detailed decisions that the C/P-state code is making, rather than the too high level "average" that we have today. An example way of using this is: mount -t debugfs none /sys/kernel/debug echo cstate > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_enabled sleep 1 echo 0 > /sys/kernel/debug/tracing/tracing_enabled cat /sys/kernel/debug/tracing/trace | perl scripts/trace/cstate.pl > out.svg Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 4 + arch/x86/kernel/process.c | 16 +++ include/linux/ftrace.h | 29 +++++ kernel/trace/Kconfig | 11 ++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 7 ++ kernel/trace/trace_power.c | 179 +++++++++++++++++++++++++++++ scripts/trace/power.pl | 108 +++++++++++++++++ 8 files changed, 355 insertions(+) create mode 100644 kernel/trace/trace_power.c create mode 100644 scripts/trace/power.pl (limited to 'scripts') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467..88ea02dcb62 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; int result = 0; + struct power_trace it; dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); @@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } + trace_power_mark(&it, POWER_PSTATE, next_perf_state); + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d..c27af49a4ed 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,6 +7,7 @@ #include #include #include +#include #include unsigned long idle_halt; @@ -100,6 +101,9 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, 1); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -112,6 +116,7 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(&it); } else { local_irq_enable(); /* loop is done by the caller */ @@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(ax, cx); } + trace_power_end(&it); } /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { + struct power_trace it; if (!need_resched()) { + trace_power_start(&it, POWER_CSTATE, 1); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(&it); } else local_irq_enable(); } @@ -183,9 +195,13 @@ static void mwait_idle(void) */ static void poll_idle(void) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, 0); local_irq_enable(); while (!need_resched()) cpu_relax(); + trace_power_end(&it); } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b..0df28866620 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -311,6 +311,35 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; + +struct power_trace { +#ifdef CONFIG_POWER_TRACER + ktime_t stamp; + ktime_t end; + int type; + int state; +#endif +}; + +#ifdef CONFIG_POWER_TRACER +extern void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_end(struct power_trace *it); +#else +static inline void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_end(struct power_trace *it) { } +#endif + /* * Structure that defines a return function trace. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67..d151aab48ed 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -217,6 +217,17 @@ config BRANCH_TRACER Say N if unsure. +config POWER_TRACER + bool "Trace power consumption behavior" + depends on DEBUG_KERNEL + depends on X86 + select TRACING + help + This tracer helps developers to analyze and optimize the kernels + power management decisions, specifically the C-state and P-state + behavior. + + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e82..acaa06553ec 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -32,5 +32,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o +obj-$(CONFIG_POWER_TRACER) += trace_power.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af..4c453778a6a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -28,6 +28,7 @@ enum trace_type { TRACE_FN_RET, TRACE_USER_STACK, TRACE_BTS, + TRACE_POWER, __TRACE_LAST_TYPE }; @@ -160,6 +161,11 @@ struct bts_entry { unsigned long to; }; +struct trace_power { + struct trace_entry ent; + struct power_trace state_data; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -266,6 +272,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c new file mode 100644 index 00000000000..a7172a352f6 --- /dev/null +++ b/kernel/trace/trace_power.c @@ -0,0 +1,179 @@ +/* + * ring buffer based C-state tracer + * + * Arjan van de Ven + * Copyright (C) 2008 Intel Corporation + * + * Much is borrowed from trace_boot.c which is + * Copyright (C) 2008 Frederic Weisbecker + * + */ + +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *power_trace; +static int __read_mostly trace_power_enabled; + + +static void start_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 1; +} + +static void stop_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 0; +} + + +static int power_trace_init(struct trace_array *tr) +{ + int cpu; + power_trace = tr; + + trace_power_enabled = 1; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + return 0; +} + +static enum print_line_t power_print_line(struct trace_iterator *iter) +{ + int ret = 0; + struct trace_entry *entry = iter->ent; + struct trace_power *field ; + struct power_trace *it; + struct trace_seq *s = &iter->seq; + struct timespec stamp; + struct timespec duration; + + trace_assign_type(field, entry); + it = &field->state_data; + stamp = ktime_to_timespec(it->stamp); + duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); + + if (entry->type == TRACE_POWER) { + if (it->type == POWER_CSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu, + duration.tv_sec, + duration.tv_nsec); + if (it->type == POWER_PSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer power_tracer __read_mostly = +{ + .name = "power", + .init = power_trace_init, + .start = start_power_trace, + .stop = stop_power_trace, + .reset = stop_power_trace, + .print_line = power_print_line, +}; + +static int init_power_trace(void) +{ + return register_tracer(&power_tracer); +} +device_initcall(init_power_trace); + +void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int level) +{ + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); +} +EXPORT_SYMBOL_GPL(trace_power_start); + + +void trace_power_end(struct power_trace *it) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + preempt_disable(); + it->end = ktime_get(); + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_end); + +void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int level) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); + preempt_disable(); + it->end = it->stamp; + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_mark); diff --git a/scripts/trace/power.pl b/scripts/trace/power.pl new file mode 100644 index 00000000000..4f729b3501e --- /dev/null +++ b/scripts/trace/power.pl @@ -0,0 +1,108 @@ +#!/usr/bin/perl + +# Copyright 2008, Intel Corporation +# +# This file is part of the Linux kernel +# +# This program file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program in a file named COPYING; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA +# +# Authors: +# Arjan van de Ven + + +# +# This script turns a cstate ftrace output into a SVG graphic that shows +# historic C-state information +# +# +# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg +# + +my @styles; +my $base = 0; + +my @pstate_last; +my @pstate_level; + +$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; + + +print " \n"; +print "\n"; + +my $scale = 30000.0; +while (<>) { + my $line = $_; + if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) { + if ($base == 0) { + $base = $1; + } + my $time = $1 - $base; + $time = $time * $scale; + my $C = $2; + my $cpu = $3; + my $y = 400 * $cpu; + my $duration = $4 * $scale; + my $msec = int($4 * 100000)/100.0; + my $height = $C * 20; + $style = $styles[$C]; + + $y = $y + 140 - $height; + + $x2 = $time + 4; + $y2 = $y + 4; + + + print "\n"; + print "C$C $msec\n"; + } + if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) { + my $time = $1 - $base; + my $state = $2; + my $cpu = $3; + + if (defined($pstate_last[$cpu])) { + my $from = $pstate_last[$cpu]; + my $oldstate = $pstate_state[$cpu]; + my $duration = ($time-$from) * $scale; + + $from = $from * $scale; + my $to = $from + $duration; + my $height = 140 - ($oldstate * (140/8)); + + my $y = 400 * $cpu + 200 + $height; + my $y2 = $y+4; + my $style = $styles[8]; + + print "\n"; + print "P$oldstate (cpu $cpu)\n"; + }; + + $pstate_last[$cpu] = $time; + $pstate_state[$cpu] = $state; + } +} + + +print "\n"; -- cgit v1.2.3-70-g09d2